Sync from internal Google development.

Many improvements, too many to mention. One significant perf regression warrants investigation: omitfp.parsetoproto2_googlemessage1.upb_jit: 343 -> 252 (-26.53) plain.parsetoproto2_googlemessage1.upb_jit: 334 -> 251 (-24.85) 25% regression for this benchmark is bad, but since I don't think there's any fundamental design issue that caused it I'm going to go ahead with the commit anyway. Can investigate and fix later. Other benchmarks were neutral or showed slight improvement.
13 years ago · 86bad61b76
parent db59a5198f
commit 86bad61b76
49 changed files with 4584 additions and 3633 deletions
--- a/83
+++ b/83
@ -83,11 +83,15 @@ deps: Makefile $(ALLSRC)
 CORE= \
  upb/upb.c \
  upb/handlers.c \
-  upb/descriptor.c \
+  upb/descriptor/reader.c \
  upb/table.c \
+  upb/refcount.c \
  upb/def.c \
  upb/msg.c \
  upb/bytestream.c \
+  bindings/cpp/upb/proto2_bridge.cc \
+
+# TODO: the proto2 bridge should be built as a separate library.

 # Library for the protocol buffer format (both text and binary).
 PB= \
@ -122,8 +126,9 @@ LIBUPB_PIC=upb/libupb_pic.a
 lib: $(LIBUPB)


-OBJ=$(patsubst %.c,%.o,$(SRC))
-PICOBJ=$(patsubst %.c,%.lo,$(SRC))
+OBJ=$(patsubst %.c,%.o,$(SRC)) $(patsubst %.cc,%.o,$(SRC))
+PICOBJ=$(patsubst %.c,%.lo,$(SRC)) $(patsubst %.cc,%.lo,$(SRC))
+

 ifdef USE_JIT
 upb/pb/decoder.o upb/pb/decoder.lo: upb/pb/decoder_x64.h
@ -139,10 +144,18 @@ $(LIBUPB_PIC): $(PICOBJ)
 	$(E) CC $<
 	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<

+%.o : %.cc
+	$(E) CXX $<
+	$(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $<
+
 %.lo : %.c
 	$(E) 'CC -fPIC' $<
 	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $< -fPIC

+%.o : %.cc
+	$(E) CXX $<
+	$(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $< -fPIC
+
 # Override the optimization level for def.o, because it is not in the
 # critical path but gets very large when -O3 is used.
 upb/def.o: upb/def.c
@ -197,47 +210,39 @@ tests/test.proto.pb: tests/test.proto
 SIMPLE_TESTS= \
  tests/test_def \
  tests/test_varint \
-  tests/tests \
-
-# Too many tests in this binary to run Valgrind (it takes minutes).
-SLOW_TESTS= \
-  tests/test_decoder \

 SIMPLE_CXX_TESTS= \
  tests/test_table \
  tests/test_cpp \
+  tests/test_decoder \

 VARIADIC_TESTS= \
  tests/t.test_vs_proto2.googlemessage1 \
  tests/t.test_vs_proto2.googlemessage2 \

-TESTS=$(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS) $(VARIADIC_TESTS) $(SLOW_TESTS)
-tests: $(TESTS)
+TESTS=$(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS) $(VARIADIC_TESTS)
+
+
+tests: $(TESTS) $(INTERACTIVE_TESTS)
 $(TESTS): $(LIBUPB)
-tests/tests: tests/test.proto.pb
+tests/test_def: tests/test.proto.pb

 $(SIMPLE_TESTS): % : %.c
 	$(E) CC $<
 	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ $< $(LIBUPB)

-VALGRIND=valgrind --leak-check=full --error-exitcode=1 
+VALGRIND=valgrind --leak-check=full --error-exitcode=1
 test: tests
 	@echo Running all tests under valgrind.
 	@set -e  # Abort on error.
 	@for test in $(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS); do \
 	  if [ -x ./$$test ] ; then \
-	    echo !!! $(VALGRIND) ./$$test tests/test.proto.pb; \
+	    echo !!! $(VALGRIND) ./$$test; \
 	    $(VALGRIND) ./$$test tests/test.proto.pb || exit 1; \
 	  fi \
 	done;
-	@for test in "$(SLOW_TESTS)"; do \
-	  if [ -x ./$$test ] ; then \
-	    echo !!! ./$$test; \
-	    ./$$test || exit 1; \
-	  fi \
-	done;
-	@$(VALGRIND) tests/t.test_vs_proto2.googlemessage1 benchmarks/google_messages.proto.pb benchmarks/google_message1.dat
-	@$(VALGRIND) tests/t.test_vs_proto2.googlemessage2 benchmarks/google_messages.proto.pb benchmarks/google_message2.dat
+	@$(VALGRIND) ./tests/t.test_vs_proto2.googlemessage1 benchmarks/google_message1.dat || exit 1;
+	@$(VALGRIND) ./tests/t.test_vs_proto2.googlemessage2 benchmarks/google_message2.dat || exit 1;
 	@echo "All tests passed!"

 tests/t.test_vs_proto2.googlemessage1 \
@ -273,15 +278,11 @@ tests/tests: upb/libupb.a
 # Benchmarks
 UPB_BENCHMARKS=benchmarks/b.parsestream_googlemessage1.upb_table \
               benchmarks/b.parsestream_googlemessage2.upb_table \
-               benchmarks/b.parsetostruct_googlemessage1.upb_table_byval \
-               benchmarks/b.parsetostruct_googlemessage2.upb_table_byval \

 ifdef USE_JIT
 UPB_BENCHMARKS += \
               benchmarks/b.parsestream_googlemessage1.upb_jit \
               benchmarks/b.parsestream_googlemessage2.upb_jit \
-               benchmarks/b.parsetostruct_googlemessage1.upb_jit_byval \
-               benchmarks/b.parsetostruct_googlemessage2.upb_jit_byval \
               benchmarks/b.parsetoproto2_googlemessage1.upb_jit \
               benchmarks/b.parsetoproto2_googlemessage2.upb_jit
 endif
@ -318,21 +319,21 @@ benchmarks/google_messages.pb.cc: benchmarks/google_messages.proto
 # want to make these command-line parameters -- it makes it more annoying to
 # debug or profile them.

-benchmarks/b.parsetostruct_googlemessage1.upb_table_byval \
-benchmarks/b.parsetostruct_googlemessage2.upb_table_byval: \
+benchmarks/b.parsetostruct_googlemessage1.upb_table \
+benchmarks/b.parsetostruct_googlemessage2.upb_table: \
    benchmarks/parsetostruct.upb.c $(LIBUPB) benchmarks/google_messages.proto.pb
-	$(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage1, byval, nojit)'
-	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage1.upb_table_byval $< \
+	$(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage1, nojit)'
+	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage1.upb_table $< \
 	  -DMESSAGE_NAME=\"benchmarks.SpeedMessage1\" \
 	  -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \
 	  -DMESSAGE_FILE=\"google_message1.dat\" \
-	  -DBYREF=false -DJIT=false $(LIBUPB)
-	$(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage2, byref, nojit)'
-	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage2.upb_table_byval $< \
+	  -DJIT=false $(LIBUPB)
+	$(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage2, nojit)'
+	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage2.upb_table $< \
 	  -DMESSAGE_NAME=\"benchmarks.SpeedMessage2\" \
 	  -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \
 	  -DMESSAGE_FILE=\"google_message2.dat\" \
-	  -DBYREF=false -DJIT=false $(LIBUPB)
+	  -DJIT=false $(LIBUPB)

 benchmarks/b.parsestream_googlemessage1.upb_table \
 benchmarks/b.parsestream_googlemessage2.upb_table: \
@ -351,21 +352,21 @@ benchmarks/b.parsestream_googlemessage2.upb_table: \
 	  $(LIBUPB)

 ifdef USE_JIT
-benchmarks/b.parsetostruct_googlemessage1.upb_jit_byval \
-benchmarks/b.parsetostruct_googlemessage2.upb_jit_byval: \
+benchmarks/b.parsetostruct_googlemessage1.upb_jit \
+benchmarks/b.parsetostruct_googlemessage2.upb_jit: \
    benchmarks/parsetostruct.upb.c $(LIBUPB) benchmarks/google_messages.proto.pb
-	$(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage1, byref, jit)'
-	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage1.upb_jit_byval $< \
+	$(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage1, jit)'
+	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage1.upb_jit $< \
 	  -DMESSAGE_NAME=\"benchmarks.SpeedMessage1\" \
 	  -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \
 	  -DMESSAGE_FILE=\"google_message1.dat\" -DJIT=true \
-	  -DBYREF=true -DJIT=true $(LIBUPB)
-	$(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage2, byval, jit)'
-	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage2.upb_jit_byval $< \
+	  -DJIT=true $(LIBUPB)
+	$(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage2, jit)'
+	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage2.upb_jit $< \
 	  -DMESSAGE_NAME=\"benchmarks.SpeedMessage2\" \
 	  -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \
 	  -DMESSAGE_FILE=\"google_message2.dat\" -DJIT=true \
-	  -DBYREF=false -DJIT=true $(LIBUPB)
+	  -DJIT=true $(LIBUPB)

 benchmarks/b.parsestream_googlemessage1.upb_jit \
 benchmarks/b.parsestream_googlemessage2.upb_jit: \
--- a/benchmarks/google_messages.proto
+++ b/benchmarks/google_messages.proto
@ -3,6 +3,11 @@ package benchmarks;

 option optimize_for = SPEED;

+enum Foo {
+  FOO_VALUE = 1;
+  FOO_VALUE2 = 2;
+}
+
 message SpeedMessage1 {
  required string field1 = 1;
  optional string field9 = 9;
@ -45,6 +50,7 @@ message SpeedMessage1 {
  optional int32 field128 = 128 [default=0];
  optional string field129 = 129 [default="xxxxxxxxxxxxxxxxxxxxx"];
  optional int32 field131 = 131 [default=0];
+  optional Foo field132 = 132 [default=FOO_VALUE];
 }

 message SpeedMessage1SubMessage {
--- a/benchmarks/parsestream.upb.c
+++ b/benchmarks/parsestream.upb.c
@ -39,7 +39,7 @@ static bool initialize()
    return false;
  }

-  def = upb_dyncast_msgdef_const(upb_symtab_lookup(s, MESSAGE_NAME));
+  def = upb_dyncast_msgdef_const(upb_symtab_lookup(s, MESSAGE_NAME, &def));
  if(!def) {
    fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
    return false;
@ -68,7 +68,7 @@ static bool initialize()
 static void cleanup()
 {
  free(input_str);
-  upb_def_unref(UPB_UPCAST(def));
+  upb_def_unref(UPB_UPCAST(def), &def);
  upb_decoder_uninit(&decoder);
  upb_decoderplan_unref(plan);
  upb_stringsrc_uninit(&stringsrc);
--- a/benchmarks/parsetoproto2.upb.cc
+++ b/benchmarks/parsetoproto2.upb.cc
@ -1,320 +1,61 @@
-// This file is a crime against software engineering.  It breaks the
-// encapsulation of proto2 in numerous ways, violates the C++ standard
-// in others, and generally deserves to have comtempt and scorn heaped
-// upon it.
-//
-// Its purpose is to get an accurate benchmark for how fast upb can
-// parse into proto2 data structures.  To add proper support for this
-// functionality, proto2 would need to expose actual support for the
-// operations we are trying to perform here.
+// Tests speed of upb parsing into proto2 generated classes.

 #define __STDC_LIMIT_MACROS 1
 #include "main.c"

 #include <stdint.h>
-#include "upb/bytestream.h"
-#include "upb/def.h"
-#include "upb/msg.h"
-#include "upb/pb/decoder.h"
+#include "upb/bytestream.hpp"
+#include "upb/def.hpp"
+#include "upb/msg.hpp"
+#include "upb/pb/decoder.hpp"
 #include "upb/pb/glue.h"
-
-// Need to violate the encapsulation of GeneratedMessageReflection -- see below.
-#define private public
+#include "upb/proto2_bridge.hpp"
 #include MESSAGE_HFILE
-#include <google/protobuf/descriptor.h>
-#undef private

-static size_t len;
+const char *str;
+size_t len;
 MESSAGE_CIDENT msg[NUM_MESSAGES];
 MESSAGE_CIDENT msg2;
-static upb_stringsrc strsrc;
-static upb_decoder d;
-static const upb_msgdef *def;
-static upb_decoderplan *p;
-char *str;
-
-#define PROTO2_APPEND(type, ctype) \
-  upb_flow_t proto2_append_ ## type(void *_r, upb_value fval, upb_value val) { \
-    (void)fval; \
-    typedef google::protobuf::RepeatedField<ctype> R; \
-    R *r = (R*)_r; \
-    r->Add(upb_value_get ## type(val)); \
-    return UPB_CONTINUE; \
-  }
-
-PROTO2_APPEND(double, double)
-PROTO2_APPEND(float, float)
-PROTO2_APPEND(uint64, uint64_t)
-PROTO2_APPEND(int64, int64_t)
-PROTO2_APPEND(int32, int32_t)
-PROTO2_APPEND(uint32, uint32_t)
-PROTO2_APPEND(bool, bool)
-
-upb_flow_t proto2_setstr(void *m, upb_value fval, upb_value val) {
-  assert(m != NULL);
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  std::string **str = (std::string**)UPB_INDEX(m, f->offset, 1);
-  if (*str == f->default_ptr) *str = new std::string;
-  const upb_byteregion *reg = upb_value_getbyteregion(val);
-  size_t len;
-  (*str)->assign(
-      upb_byteregion_getptr(reg, upb_byteregion_startofs(reg), &len),
-      upb_byteregion_len(reg));
-  // XXX: only supports contiguous strings atm.
-  assert(len == upb_byteregion_len(reg));
-  return UPB_CONTINUE;
-}
-
-upb_flow_t proto2_append_str(void *_r, upb_value fval, upb_value val) {
-  assert(_r != NULL);
-  typedef google::protobuf::RepeatedPtrField<std::string> R;
-  (void)fval;
-  R *r = (R*)_r;
-  const upb_byteregion *reg = upb_value_getbyteregion(val);
-  size_t len;
-  r->Add()->assign(
-      upb_byteregion_getptr(reg, upb_byteregion_startofs(reg), &len),
-      upb_byteregion_len(reg));
-  // XXX: only supports contiguous strings atm.
-  assert(len == upb_byteregion_len(reg));
-  return UPB_CONTINUE;
-}
-
-upb_sflow_t proto2_startseq(void *m, upb_value fval) {
-  assert(m != NULL);
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  return UPB_CONTINUE_WITH(UPB_INDEX(m, f->offset, 1));
-}
-
-upb_sflow_t proto2_startsubmsg(void *m, upb_value fval) {
-  assert(m != NULL);
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  google::protobuf::Message *prototype = (google::protobuf::Message*)f->prototype;
-  void **subm = (void**)UPB_INDEX(m, f->offset, 1);
-  if (*subm == NULL || *subm == f->default_ptr)
-    *subm = prototype->New();
-  assert(*subm != NULL);
-  return UPB_CONTINUE_WITH(*subm);
-}
-
-class UpbRepeatedPtrField : public google::protobuf::internal::RepeatedPtrFieldBase {
- public:
-  class TypeHandler {
-   public:
-    typedef void Type;
-    // AddAllocated() calls this, but only if other objects are sitting
-    // around waiting for reuse, which we will not do.
-    static void Delete(Type*) { assert(false); }
-  };
-  void *Add(google::protobuf::Message *m) {
-    void *submsg = RepeatedPtrFieldBase::AddFromCleared<TypeHandler>();
-    if (!submsg) {
-      submsg = m->New();
-      RepeatedPtrFieldBase::AddAllocated<TypeHandler>(submsg);
-    }
-    return submsg;
-  }
-};
-
-upb_sflow_t proto2_startsubmsg_r(void *_r, upb_value fval) {
-  assert(_r != NULL);
-  // Compared to the other writers, this implementation is particularly sketchy.
-  // The object we are modifying is a RepeatedPtrField<SubType>*, but we can't
-  // properly declare that templated pointer because we don't have access to
-  // that type at compile-time (and wouldn't want to create a separate callback
-  // for each type anyway).  Instead we access the pointer as a
-  // RepeatedPtrFieldBase, which is indeed a superclass of RepeatedPtrField.
-  // But we can't properly declare a TypeHandler for the submessage's type,
-  // for the same reason that we can't create a RepeatedPtrField<SubType>*.
-  // Instead we treat it as a void*, and create the submessage using
-  // google::protobuf::Message::New() if we need to.
-  class TypeHandler {
-   public:
-    typedef void Type;
-  };
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  UpbRepeatedPtrField *r = (UpbRepeatedPtrField*)_r;
-  void *submsg = r->Add((google::protobuf::Message*)f->prototype);
-  assert(submsg != NULL);
-  return UPB_CONTINUE_WITH(submsg);
-}
-
-#define PROTO2MSG(type, size) { static upb_accessor_vtbl vtbl = { \
-    &proto2_startsubmsg, \
-    &upb_stdmsg_set ## type, \
-    &proto2_startseq, \
-    &proto2_startsubmsg_r, \
-    &proto2_append_ ## type, \
-    NULL, NULL, NULL, NULL, NULL, NULL}; \
-  return &vtbl; }
-
-static upb_accessor_vtbl *proto2_accessor(upb_fielddef *f) {
-  switch (f->type) {
-    case UPB_TYPE(DOUBLE): PROTO2MSG(double, 8)
-    case UPB_TYPE(FLOAT): PROTO2MSG(float, 4)
-    case UPB_TYPE(UINT64):
-    case UPB_TYPE(FIXED64): PROTO2MSG(uint64, 8)
-    case UPB_TYPE(INT64):
-    case UPB_TYPE(SFIXED64):
-    case UPB_TYPE(SINT64): PROTO2MSG(int64, 8)
-    case UPB_TYPE(INT32):
-    case UPB_TYPE(SINT32):
-    case UPB_TYPE(ENUM):
-    case UPB_TYPE(SFIXED32): PROTO2MSG(int32, 4)
-    case UPB_TYPE(UINT32):
-    case UPB_TYPE(FIXED32): PROTO2MSG(uint32, 4)
-    case UPB_TYPE(BOOL): PROTO2MSG(bool, 1)
-    case UPB_TYPE(STRING):
-    case UPB_TYPE(BYTES):
-    case UPB_TYPE(GROUP):
-    case UPB_TYPE(MESSAGE): {
-        static upb_accessor_vtbl vtbl = {
-        &proto2_startsubmsg,
-        &proto2_setstr,
-        &proto2_startseq,
-        &proto2_startsubmsg_r,
-        &proto2_append_str,
-        NULL, NULL, NULL, NULL, NULL, NULL};
-        return &vtbl;
-    }
-  }
-  return NULL;
-}
-
-static void layout_msgdef_from_proto2(upb_msgdef *upb_md,
-                                      const google::protobuf::Message *m,
-                                      const google::protobuf::Descriptor *proto2_d) {
-  // Hack: we break the encapsulation of GeneratedMessageReflection to get at
-  // the offsets we need.  If/when we do this for real, we will need
-  // GeneratedMessageReflection to expose those offsets publicly.
-  const google::protobuf::internal::GeneratedMessageReflection *r =
-      (google::protobuf::internal::GeneratedMessageReflection*)m->GetReflection();
-  for (int i = 0; i < proto2_d->field_count(); i++) {
-    const google::protobuf::FieldDescriptor *proto2_f = proto2_d->field(i);
-    upb_fielddef *upb_f = upb_msgdef_itof(upb_md, proto2_f->number());
-    assert(upb_f);
-
-    // Encapsulation violation BEGIN
-    uint32_t data_offset = r->offsets_[proto2_f->index()];
-    uint32_t hasbit = (r->has_bits_offset_ * 8) + proto2_f->index();
-    // Encapsulation violation END
-
-    if (upb_isseq(upb_f)) {
-      // proto2 does not store hasbits for repeated fields.
-      upb_f->hasbit = -1;
-    } else {
-      upb_f->hasbit = hasbit;
-    }
-    upb_f->offset = data_offset;
-    upb_fielddef_setaccessor(upb_f, proto2_accessor(upb_f));
-
-    if (upb_isstring(upb_f) && !upb_isseq(upb_f)) {
-      upb_f->default_ptr = &r->GetStringReference(*m, proto2_f, NULL);
-    } else if (upb_issubmsg(upb_f)) {
-      // XXX: skip leading "."
-      const google::protobuf::Descriptor *subm_descriptor =
-          google::protobuf::DescriptorPool::generated_pool()->
-              FindMessageTypeByName(upb_fielddef_typename(upb_f) + 1);
-      assert(subm_descriptor);
-      upb_f->prototype = google::protobuf::MessageFactory::generated_factory()->GetPrototype(subm_descriptor);
-      if (!upb_isseq(upb_f))
-        upb_f->default_ptr = &r->GetMessage(*m, proto2_f);
-    }
-  }
-}
+upb::StringSource strsrc;
+upb::Decoder d;
+const upb::MessageDef *def;
+upb::DecoderPlan* plan;

 static bool initialize()
 {
-  // Initialize upb state, decode descriptor.
-  upb_status status = UPB_STATUS_INIT;
-  upb_symtab *s = upb_symtab_new();
-
-  char *data = upb_readfile(MESSAGE_DESCRIPTOR_FILE, &len);
-  if (!data) {
-    fprintf(stderr, "Couldn't read file: " MESSAGE_DESCRIPTOR_FILE);
-    return false;
-  }
-  int n;
-  upb_def **defs = upb_load_defs_from_descriptor(data, len, &n, &status);
-  free(data);
-  if(!upb_ok(&status)) {
-    fprintf(stderr, "Error reading descriptor: %s\n",
-            upb_status_getstr(&status));
-    return false;
-  }
-
-  // Setup offsets and accessors to properly write into a proto2 generated
-  // class.
-  for (int i = 0; i < n; i++) {
-    upb_def *def = defs[i];
-    upb_msgdef *upb_md = upb_dyncast_msgdef(def);
-    if (!upb_md) continue;
-    const google::protobuf::Descriptor *proto2_md =
-        google::protobuf::DescriptorPool::generated_pool()->
-            FindMessageTypeByName(upb_def_fqname(def));
-    if (!proto2_md) abort();
-    const google::protobuf::Message *proto2_m =
-        google::protobuf::MessageFactory::generated_factory()->GetPrototype(proto2_md);
-    layout_msgdef_from_proto2(upb_md, proto2_m, proto2_md);
-  }
-
-  upb_symtab_add(s, defs, n, &status);
-  if(!upb_ok(&status)) {
-    fprintf(stderr, "Error reading adding to symtab: %s\n",
-            upb_status_getstr(&status));
-    return false;
-  }
-  for(int i = 0; i < n; i++) upb_def_unref(defs[i]);
-  free(defs);
-
-  def = upb_dyncast_msgdef_const(upb_symtab_lookup(s, MESSAGE_NAME));
-  if(!def) {
-    fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
-    return false;
-  }
-  upb_symtab_unref(s);
-
  // Read the message data itself.
  str = upb_readfile(MESSAGE_FILE, &len);
  if(str == NULL) {
    fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
    return false;
  }
-  upb_status_uninit(&status);
+
+  def = upb::proto2_bridge::NewFinalMessageDef(msg2, &def);

  msg2.ParseFromArray(str, len);

-  upb_stringsrc_init(&strsrc);
-  upb_handlers *h = upb_handlers_new();
-  upb_accessors_reghandlers(h, def);
-  p = upb_decoderplan_new(h, JIT);
-  upb_decoder_init(&d);
-  upb_decoder_resetplan(&d, p, 0);
-  upb_handlers_unref(h);
+  upb::Handlers* h = upb::Handlers::New();
+  upb::RegisterWriteHandlers(h, def);
+  plan = upb::DecoderPlan::New(h, JIT);
+  d.ResetPlan(plan, 0);
+  h->Unref();

  return true;
 }

 static void cleanup() {
-  upb_stringsrc_uninit(&strsrc);
-  upb_decoder_uninit(&d);
-  upb_def_unref(UPB_UPCAST(def));
-  upb_decoderplan_unref(p);
-  free(str);
+  def->Unref(&def);
+  plan->Unref();
 }

-static size_t run(int i)
-{
-  (void)i;
-  upb_status status = UPB_STATUS_INIT;
+static size_t run(int i) {
  msg[i % NUM_MESSAGES].Clear();
-  upb_stringsrc_reset(&strsrc, str, len);
-  upb_decoder_resetinput(
-      &d, upb_stringsrc_allbytes(&strsrc), &msg[i % NUM_MESSAGES]);
-  if (upb_decoder_decode(&d) != UPB_OK) goto err;
+  strsrc.Reset(str, len);
+  d.ResetInput(strsrc.AllBytes(), &msg[i % NUM_MESSAGES]);
+  if (d.Decode() != UPB_OK) goto err;
  return len;

 err:
-  fprintf(stderr, "Decode error: %s", upb_status_getstr(&status));
+  fprintf(stderr, "Decode error: %s", d.status().GetString());
  return 0;
 }
--- a/benchmarks/parsetostruct.upb.c
+++ b/benchmarks/parsetostruct.upb.c
@ -1,85 +0,0 @@
-
-#include "main.c"
-
-#include "upb/bytestream.h"
-#include "upb/def.h"
-#include "upb/msg.h"
-#include "upb/pb/decoder.h"
-#include "upb/pb/glue.h"
-
-static const upb_msgdef *def;
-static size_t len;
-static void *msg[NUM_MESSAGES];
-static upb_stringsrc strsrc;
-static upb_decoder d;
-static upb_decoderplan *p;
-char *str;
-
-static bool initialize()
-{
-  // Initialize upb state, decode descriptor.
-  upb_status status = UPB_STATUS_INIT;
-  upb_symtab *s = upb_symtab_new();
-  upb_load_descriptor_file_into_symtab(s, MESSAGE_DESCRIPTOR_FILE, &status);
-  if(!upb_ok(&status)) {
-    fprintf(stderr, "Error reading descriptor: %s\n",
-            upb_status_getstr(&status));
-    return false;
-  }
-
-  def = upb_dyncast_msgdef_const(upb_symtab_lookup(s, MESSAGE_NAME));
-  if(!def) {
-    fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
-    return false;
-  }
-  upb_symtab_unref(s);
-
-  // Read the message data itself.
-  str = upb_readfile(MESSAGE_FILE, &len);
-  if(str == NULL) {
-    fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
-    return false;
-  }
-  upb_status_uninit(&status);
-  for (int i = 0; i < NUM_MESSAGES; i++)
-    msg[i] = upb_stdmsg_new(def);
-
-  upb_stringsrc_init(&strsrc);
-  upb_handlers *h = upb_handlers_new();
-  upb_accessors_reghandlers(h, def);
-  p = upb_decoderplan_new(h, JIT);
-  upb_decoder_init(&d);
-  upb_handlers_unref(h);
-  upb_decoder_resetplan(&d, p, 0);
-
-  if (!BYREF) {
-    // TODO: use byref/byval accessors.
-  }
-  return true;
-}
-
-static void cleanup()
-{
-  for (int i = 0; i < NUM_MESSAGES; i++)
-    upb_stdmsg_free(msg[i], def);
-  upb_def_unref(UPB_UPCAST(def));
-  upb_stringsrc_uninit(&strsrc);
-  upb_decoder_uninit(&d);
-  upb_decoderplan_unref(p);
-  free(str);
-}
-
-static size_t run(int i)
-{
-  upb_status status = UPB_STATUS_INIT;
-  i %= NUM_MESSAGES;
-  upb_msg_clear(msg[i], def);
-  upb_stringsrc_reset(&strsrc, str, len);
-  upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), msg[i]);
-  if (upb_decoder_decode(&d) != UPB_OK) goto err;
-  return len;
-
-err:
-  fprintf(stderr, "Decode error: %s", upb_status_getstr(&status));
-  return 0;
-}
--- a/bindings/cpp/upb/bytestream.hpp
+++ b/bindings/cpp/upb/bytestream.hpp
@ -68,6 +68,7 @@

 #include "upb/bytestream.h"
 #include "upb/upb.hpp"
+#include <string>

 namespace upb {

@ -204,6 +205,18 @@ class ByteRegion : public upb_byteregion {
    return upb_byteregion_strdup(this);
  }

+  template <typename T> void AssignToString(T* str) {
+    uint64_t ofs = start_ofs();
+    str->clear();
+    str->reserve(Length());
+    while (ofs < end_ofs()) {
+      size_t len;
+      const char *ptr = GetPtr(ofs, &len);
+      str->append(ptr, len);
+      ofs += len;
+    }
+  }
+
  // TODO: add if/when there is a demonstrated need.
  //
  // // Pins this byteregion's bytes in memory, allowing it to outlive its
@ -220,12 +233,24 @@ class ByteRegion : public upb_byteregion {
 class StringSource : public upb_stringsrc {
 public:
  StringSource() : upb_stringsrc() { upb_stringsrc_init(this); }
+  template <typename T> explicit StringSource(const T& str) {
+    upb_stringsrc_init(this);
+    Reset(str);
+  }
+  StringSource(const char *data, size_t len) {
+    upb_stringsrc_init(this);
+    Reset(data, len);
+  }
  ~StringSource() { upb_stringsrc_uninit(this); }

  void Reset(const char* data, size_t len) {
    upb_stringsrc_reset(this, data, len);
  }

+  template <typename T> void Reset(const T& str) {
+    Reset(str.c_str(), str.size());
+  }
+
  ByteRegion* AllBytes() {
    return static_cast<ByteRegion*>(upb_stringsrc_allbytes(this));
  }
@ -233,6 +258,14 @@ class StringSource : public upb_stringsrc {
  upb_bytesrc* ByteSource() { return upb_stringsrc_bytesrc(this); }
 };

+template <> inline ByteRegion* GetValue<ByteRegion*>(Value v) {
+  return static_cast<ByteRegion*>(upb_value_getbyteregion(v));
+}
+
+template <> inline Value MakeValue<ByteRegion*>(ByteRegion* v) {
+  return upb_value_byteregion(v);
+}
+
 }  // namespace upb

 #endif
--- a/bindings/cpp/upb/def.hpp
+++ b/bindings/cpp/upb/def.hpp
@ -1,7 +1,7 @@
 //
 // upb - a minimalist implementation of protocol buffers.
 //
-// Copyright (c) 2011 Google Inc.  See LICENSE for details.
+// Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
 // Author: Josh Haberman <jhaberman@gmail.com>
 //
 // The set of upb::*Def classes and upb::SymbolTable allow for defining and
@ -15,21 +15,20 @@
 //    not be used for any purpose except to set its properties (it can't be
 //    used to parse anything, create any messages in memory, etc).
 //
-// 2. FINALIZED: after being added to a symtab (which links the defs together)
-//    the defs become finalized (thread-safe and immutable).  Programs may only
-//    access defs through a CONST POINTER during this stage -- upb_symtab will
-//    help you out with this requirement by only vending const pointers, but
-//    you need to make sure not to use any non-const pointers you still have
-//    sitting around.  In practice this means that you may not call any setters
-//    on the defs (or functions that themselves call the setters).  If you want
-//    to modify an existing immutable def, copy it with upb_*_dup(), modify the
-//    copy, and add the modified def to the symtab (replacing the existing
-//    def).
+// 2. FINALIZED: the Def::Finzlie() operation finalizes a set of defs,
+//    which makes them thread-safe and immutable.  Finalized defs may only be
+//    accessed through a CONST POINTER.  If you want to modify an existing
+//    immutable def, copy it with Dup() and modify and finalize the copy.
 //
-// You can test for which stage of life a def is in by calling
-// upb::Def::IsMutable().  This is particularly useful for dynamic language
-// bindings, which must properly guarantee that the dynamic language cannot
-// break the rules laid out above.
+// The refcounting of defs works properly no matter what state the def is in.
+// Once the def is finalized it is guaranteed that any def reachable from a
+// live def is also live (so a ref on the base of a message tree keeps the
+// whole tree alive).
+//
+// You can test for which stage of life a def is in by calling IsMutable().
+// This is particularly useful for dynamic language bindings, which must
+// properly guarantee that the dynamic language cannot break the rules laid out
+// above.
 //
 // It would be possible to make the defs thread-safe during stage 1 by using
 // mutexes internally and changing any methods returning pointers to return
@ -48,63 +47,213 @@

 namespace upb {

+class Def;
 class MessageDef;

+typedef upb_fieldtype_t FieldType;
+typedef upb_label_t Label;
+
 class FieldDef : public upb_fielddef {
 public:
-  static FieldDef* Cast(upb_fielddef *f) { return (FieldDef*)f; }
-  static const FieldDef* Cast(const upb_fielddef *f) { return (FieldDef*)f; }
+  static FieldDef* Cast(upb_fielddef *f) { return static_cast<FieldDef*>(f); }
+  static const FieldDef* Cast(const upb_fielddef *f) {
+    return static_cast<const FieldDef*>(f);
+  }
+
+  static FieldDef* New(void *owner) { return Cast(upb_fielddef_new(owner)); }
+  FieldDef* Dup(void *owner) const {
+    return Cast(upb_fielddef_dup(this, owner));
+  }
+  void Ref(void *owner) { upb_fielddef_ref(this, owner); }
+  void Unref(void *owner) { upb_fielddef_unref(this, owner); }

-  static FieldDef* New() { return Cast(upb_fielddef_new()); }
-  FieldDef* Dup() { return Cast(upb_fielddef_dup(this)); }
+  bool IsMutable() const { return upb_fielddef_ismutable(this); }
+  bool IsFinalized() const { return upb_fielddef_isfinalized(this); }
+  bool IsString() const { return upb_isstring(this); }
+  bool IsSequence() const { return upb_isseq(this); }
+  bool IsSubmessage() const { return upb_issubmsg(this); }

-  // Read accessors -- may be called at any time.
-  uint8_t type() const { return upb_fielddef_type(this); }
-  uint8_t label() const { return upb_fielddef_label(this); }
+  // Simple accessors. /////////////////////////////////////////////////////////
+
+  FieldType type() const { return upb_fielddef_type(this); }
+  Label label() const { return upb_fielddef_label(this); }
  int32_t number() const { return upb_fielddef_number(this); }
  std::string name() const { return std::string(upb_fielddef_name(this)); }
  Value default_() const { return upb_fielddef_default(this); }
  Value bound_value() const { return upb_fielddef_fval(this); }
+  uint16_t offset() const { return upb_fielddef_offset(this); }
+  int16_t hasbit() const { return upb_fielddef_hasbit(this); }
+
+  bool set_type(FieldType type) { return upb_fielddef_settype(this, type); }
+  bool set_label(Label label) { return upb_fielddef_setlabel(this, label); }
+  void set_offset(uint16_t offset) { upb_fielddef_setoffset(this, offset); }
+  void set_hasbit(int16_t hasbit) { upb_fielddef_sethasbit(this, hasbit); }
+  void set_fval(Value fval) { upb_fielddef_setfval(this, fval); }
+  void set_accessor(struct _upb_accessor_vtbl* vtbl) {
+    upb_fielddef_setaccessor(this, vtbl);
+  }
+  MessageDef* message();
+  const MessageDef* message() const;

-  MessageDef* message() { return (MessageDef*)upb_fielddef_msgdef(this); }
-  const MessageDef* message() const { return (MessageDef*)upb_fielddef_msgdef(this); }
-
-  // Will be added once upb::Def is defined:
-  // Def* subdef() { return upb_fielddef_subdef(this); }
-  // const Def* subdef() { return upb_fielddef_subdef(this); }
-
-  // Returns true if this FieldDef is finalized
-  bool IsFinalized() const { return upb_fielddef_finalized(this); }
  struct _upb_accessor_vtbl *accessor() const {
    return upb_fielddef_accessor(this);
  }
-  std::string type_name() const {
-    return std::string(upb_fielddef_typename(this));
+
+  // "Number" and "name" must be set before the fielddef is added to a msgdef.
+  // For the moment we do not allow these to be set once the fielddef is added
+  // to a msgdef -- this could be relaxed in the future.
+  bool set_number(int32_t number) {
+    return upb_fielddef_setnumber(this, number);
+  }
+  bool set_name(const char *name) { return upb_fielddef_setname(this, name); }
+  bool set_name(const std::string& name) { return set_name(name.c_str()); }
+
+  // Default value. ////////////////////////////////////////////////////////////
+
+  // Returns the default value for this fielddef, which may either be something
+  // the client set explicitly or the "default default" (0 for numbers, empty
+  // for strings).  The field's type indicates the type of the returned value,
+  // except for enum fields that are still mutable.
+  //
+  // For enums the default can be set either numerically or symbolically -- the
+  // upb_fielddef_default_is_symbolic() function below will indicate which it
+  // is.  For string defaults, the value will be a upb_byteregion which is
+  // invalidated by any other non-const call on this object.  Once the fielddef
+  // is finalized, symbolic enum defaults are resolved, so finalized enum
+  // fielddefs always have a default of type int32.
+  Value defaultval() { return upb_fielddef_default(this); }
+
+  // Sets default value for the field.  For numeric types, use
+  // upb_fielddef_setdefault(), and "value" must match the type of the field.
+  // For string/bytes types, use upb_fielddef_setdefaultstr().  Enum types may
+  // use either, since the default may be set either numerically or
+  // symbolically.
+  //
+  // NOTE: May only be called for fields whose type has already been set.
+  // Also, will be reset to default if the field's type is set again.
+  void set_default(Value value) { upb_fielddef_setdefault(this, value); }
+  void set_default(const char *str) { upb_fielddef_setdefaultcstr(this, str); }
+  void set_default(const char *str, size_t len) {
+    upb_fielddef_setdefaultstr(this, str, len);
+  }
+  void set_default(const std::string& str) {
+    upb_fielddef_setdefaultstr(this, str.c_str(), str.size());
+  }
+
+  // The results of this function are only meaningful for mutable enum fields,
+  // which can have a default specified either as an integer or as a string.
+  // If this returns true, the default returned from upb_fielddef_default() is
+  // a string, otherwise it is an integer.
+  bool DefaultIsSymbolic() { return upb_fielddef_default_is_symbolic(this); }
+
+  // Subdef. ///////////////////////////////////////////////////////////////////
+
+  // Submessage and enum fields must reference a "subdef", which is the
+  // MessageDef or EnumDef that defines their type.  Note that when the
+  // FieldDef is mutable it may not have a subdef *yet*, but this still returns
+  // true to indicate that the field's type requires a subdef.
+  bool HasSubDef() { return upb_hassubdef(this); }
+
+  // Before a FieldDef is finalized, its subdef may be set either directly
+  // (with a Def*) or symbolically.  Symbolic refs must be resolved by the
+  // client before the containing msgdef can be finalized.
+  //
+  // Both methods require that HasSubDef() (so the type must be set prior to
+  // calling these methods).  Returns false if this is not the case, or if the
+  // given subdef is not of the correct type.  The subtype is reset if the
+  // field's type is changed.
+  bool set_subdef(Def* def);
+  bool set_subtype_name(const char *name) {
+    return upb_fielddef_setsubtypename(this, name);
+  }
+  bool set_subtype_name(const std::string& str) {
+    return set_subtype_name(str.c_str());
  }

-  // Write accessors -- may not be called once the FieldDef is finalized.
+  // Returns the enum or submessage def or symbolic name for this field, if
+  // any.  May only be called for fields where HasSubDef() is true.  Returns
+  // NULL if the subdef has not been set or if you ask for a subtype name when
+  // the subtype is currently set symbolically (or vice-versa).
+  //
+  // Caller does *not* own a ref on the returned def or string.
+  // subtypename_name() is non-const because only mutable defs can have the
+  // subtype name set symbolically (symbolic references must be resolved before
+  // the MessageDef can be finalized).
+  const Def* subdef() const;
+  const char *subtype_name() { return upb_fielddef_subtypename(this); }

 private:
-  FieldDef();
-  ~FieldDef();
+  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(FieldDef);
+};
+
+class Def : public upb_def {
+ public:
+  // Converting from C types to C++ wrapper types.
+  static Def* Cast(upb_def *def) { return static_cast<Def*>(def); }
+  static const Def* Cast(const upb_def *def) {
+    return static_cast<const Def*>(def);
+  }
+
+  void Ref(void *owner) const { upb_def_ref(this, owner); }
+  void Unref(void *owner) const { upb_def_unref(this, owner); }
+
+  void set_full_name(const char *name) { upb_def_setfullname(this, name); }
+  void set_full_name(const std::string& name) {
+    upb_def_setfullname(this, name.c_str());
+  }
+
+  const char *full_name() const { return upb_def_fullname(this); }
+
+  // Finalizes the given list of defs (as well as the fielddefs for the given
+  // msgdefs).  All defs reachable from any def in this list must either be
+  // already finalized or elsewhere in the list.  Any symbolic references to
+  // enums or submessages must already have been resolved.  Returns true on
+  // success, otherwise false is returned and status contains details.  In the
+  // error case the input defs are unmodified.  See the comment at the top of
+  // this file for the semantics of finalized defs.
+  //
+  // n is currently limited to 64k defs, if more are required break them into
+  // batches of 64k (or we could raise this limit, at the cost of a bigger
+  // upb_def structure or complexity in upb_def_finalize()).
+  static bool Finalize(Def*const* defs, int n, Status* status) {
+    return upb_finalize(reinterpret_cast<upb_def*const*>(defs), n, status);
+  }
+  static bool Finalize(const std::vector<Def*>& defs, Status* status) {
+    return Finalize(&defs[0], defs.size(), status);
+  }
 };

 class MessageDef : public upb_msgdef {
 public:
  // Converting from C types to C++ wrapper types.
-  static MessageDef* Cast(upb_msgdef *md) { return (MessageDef*)md; }
+  static MessageDef* Cast(upb_msgdef *md) {
+    return static_cast<MessageDef*>(md);
+  }
  static const MessageDef* Cast(const upb_msgdef *md) {
-    return (MessageDef*)md;
+    return static_cast<const MessageDef*>(md);
+  }
+  static MessageDef* DynamicCast(Def* def) {
+    return Cast(upb_dyncast_msgdef(def));
+  }
+  static const MessageDef* DynamicCast(const Def* def) {
+    return Cast(upb_dyncast_msgdef_const(def));
  }

-  static MessageDef* New() { return Cast(upb_msgdef_new()); }
-  MessageDef* Dup() { return Cast(upb_msgdef_dup(this)); }
+  Def* AsDef() { return Def::Cast(UPB_UPCAST(this)); }
+  const Def* AsDef() const { return Def::Cast(UPB_UPCAST(this)); }
+
+  static MessageDef* New(void *owner) { return Cast(upb_msgdef_new(owner)); }
+  MessageDef* Dup(void *owner) const {
+    return Cast(upb_msgdef_dup(this, owner));
+  }

-  void Ref() const { upb_msgdef_ref(this); }
-  void Unref() const { upb_msgdef_unref(this); }
+  void Ref(void *owner) const { upb_msgdef_ref(this, owner); }
+  void Unref(void *owner) const { upb_msgdef_unref(this, owner); }

  // Read accessors -- may be called at any time.

+  const char *full_name() const { return AsDef()->full_name(); }
+
  // The total size of in-memory messages created with this MessageDef.
  uint16_t instance_size() const { return upb_msgdef_size(this); }

@ -116,25 +265,32 @@ class MessageDef : public upb_msgdef {

  // Write accessors.  May only be called before the msgdef is in a symtab.

+  void set_full_name(const char *name) { AsDef()->set_full_name(name); }
+  void set_full_name(const std::string& name) { AsDef()->set_full_name(name); }
+
  void set_instance_size(uint16_t size) { upb_msgdef_setsize(this, size); }
  void set_hasbit_bytes(uint16_t size) { upb_msgdef_setsize(this, size); }
  bool SetExtensionRange(uint32_t start, uint32_t end) {
    return upb_msgdef_setextrange(this, start, end);
  }

-  // Adds a set of fields (upb_fielddef objects) to a msgdef.  Caller retains
-  // its ref on the fielddef.  May only be done before the msgdef is in a
-  // symtab (requires upb_def_ismutable(m) for the msgdef).  The fielddef's
-  // name and number must be set, and the message may not already contain any
-  // field with this name or number, and this fielddef may not be part of
-  // another message, otherwise false is returned and no action is performed.
-  bool AddFields(FieldDef*const * f, int n) {
-    return upb_msgdef_addfields(this, (upb_fielddef**)f, n);
+  // Adds a set of fields (FieldDef objects) to a MessageDef.  Caller passes a
+  // ref on the FieldDef to the MessageDef in both success and failure cases.
+  // May only be done before the MessageDef is in a SymbolTable (requires
+  // m->IsMutable() for the MessageDef).  The FieldDef's name and number must
+  // be set, and the message may not already contain any field with this name
+  // or number, and this FieldDef may not be part of another message, otherwise
+  // false is returned and the MessageDef is unchanged.
+  bool AddField(FieldDef* f, void *owner) { return AddFields(&f, 1, owner); }
+  bool AddFields(FieldDef*const * f, int n, void *owner) {
+    return upb_msgdef_addfields(this, (upb_fielddef*const*)f, n, owner);
  }
-  bool AddFields(const std::vector<FieldDef*>& fields) {
-    return AddFields(&fields[0], fields.size());
+  bool AddFields(const std::vector<FieldDef*>& fields, void *owner) {
+    return AddFields(&fields[0], fields.size(), owner);
  }

+  int field_count() const { return upb_msgdef_numfields(this); }
+
  // Lookup fields by name or number, returning NULL if no such field exists.
  FieldDef* FindFieldByName(const char *name) {
    return FieldDef::Cast(upb_msgdef_ntof(this, name));
@ -156,19 +312,89 @@ class MessageDef : public upb_msgdef {
    return FindFieldByNumber(num);
  }

-  // TODO: iteration over fields.
+  class Iterator : public upb_msg_iter {
+   public:
+    explicit Iterator(MessageDef* md) { upb_msg_begin(this, md); }
+    Iterator() {}
+
+    FieldDef* field() { return FieldDef::Cast(upb_msg_iter_field(this)); }
+    bool Done() { return upb_msg_done(this); }
+    void Next() { return upb_msg_next(this); }
+  };
+
+  class ConstIterator : public upb_msg_iter {
+   public:
+    explicit ConstIterator(const MessageDef* md) { upb_msg_begin(this, md); }
+    ConstIterator() {}
+
+    const FieldDef* field() { return FieldDef::Cast(upb_msg_iter_field(this)); }
+    bool Done() { return upb_msg_done(this); }
+    void Next() { return upb_msg_next(this); }
+  };

 private:
-  MessageDef();
-  ~MessageDef();
+  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(MessageDef);
+};
+
+class EnumDef : public upb_enumdef {
+ public:
+  // Converting from C types to C++ wrapper types.
+  static EnumDef* Cast(upb_enumdef *e) { return static_cast<EnumDef*>(e); }
+  static const EnumDef* Cast(const upb_enumdef *e) {
+    return static_cast<const EnumDef*>(e);
+  }
+
+  static EnumDef* New(void *owner) { return Cast(upb_enumdef_new(owner)); }
+
+  void Ref(void *owner) { upb_enumdef_ref(this, owner); }
+  void Unref(void *owner) { upb_enumdef_unref(this, owner); }
+  EnumDef* Dup(void *owner) const { return Cast(upb_enumdef_dup(this, owner)); }
+
+  Def* AsDef() { return Def::Cast(UPB_UPCAST(this)); }
+  const Def* AsDef() const { return Def::Cast(UPB_UPCAST(this)); }
+
+  int32_t default_value() const { return upb_enumdef_default(this); }
+
+  // May only be set if IsMutable().
+  void set_full_name(const char *name) { AsDef()->set_full_name(name); }
+  void set_full_name(const std::string& name) { AsDef()->set_full_name(name); }
+  void set_default_value(int32_t val) {
+    return upb_enumdef_setdefault(this, val);
+  }
+
+  // Adds a value to the enumdef.  Requires that no existing val has this
+  // name or number (returns false and does not add if there is).  May only
+  // be called if IsMutable().
+  bool AddValue(char *name, int32_t num) {
+    return upb_enumdef_addval(this, name, num);
+  }
+  bool AddValue(const std::string& name, int32_t num) {
+    return upb_enumdef_addval(this, name.c_str(), num);
+  }
+
+  // Lookups from name to integer and vice-versa.
+  bool LookupName(const char *name, int32_t* num) const {
+    return upb_enumdef_ntoi(this, name, num);
+  }
+
+  // Lookup from integer to name, returns a NULL-terminated string which
+  // the caller does not own, or NULL if not found.
+  const char *LookupNumber(int32_t num) const {
+    return upb_enumdef_iton(this, num);
+  }
+
+ private:
+  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(EnumDef);
 };

 class SymbolTable : public upb_symtab {
 public:
  // Converting from C types to C++ wrapper types.
-  static SymbolTable* Cast(upb_symtab *s) { return (SymbolTable*)s; }
+  static SymbolTable* Cast(upb_symtab *s) {
+    return static_cast<SymbolTable*>(s);
+  }
  static const SymbolTable* Cast(const upb_symtab *s) {
-    return (SymbolTable*)s;
+    return static_cast<const SymbolTable*>(s);
  }

  static SymbolTable* New() { return Cast(upb_symtab_new()); }
@ -176,17 +402,50 @@ class SymbolTable : public upb_symtab {
  void Ref() const { upb_symtab_unref(this); }
  void Unref() const { upb_symtab_unref(this); }

+  // Adds the given defs to the symtab, resolving all symbols.  Only one def
+  // per name may be in the list, but defs can replace existing defs in the
+  // symtab.  The entire operation either succeeds or fails.  If the operation
+  // fails, the symtab is unchanged, false is returned, and status indicates
+  // the error.  The caller passes a ref on the defs in all cases.
+  bool Add(Def *const *defs, int n, void *owner, Status* status) {
+    return upb_symtab_add(this, (upb_def*const*)defs, n, owner, status);
+  }
+  bool Add(const std::vector<Def*>& defs, void *owner, Status* status) {
+    return Add(&defs[0], defs.size(), owner, status);
+  }
+
  // If the given name refers to a message in this symbol table, returns a new
  // ref to that MessageDef object, otherwise returns NULL.
-  const MessageDef* LookupMessage(const char *name) const {
-    return MessageDef::Cast(upb_symtab_lookupmsg(this, name));
+  const MessageDef* LookupMessage(const char *name, void *owner) const {
+    return MessageDef::Cast(upb_symtab_lookupmsg(this, name, owner));
  }

 private:
-  SymbolTable();
-  ~SymbolTable();
+  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(SymbolTable);
 };

+template <> inline const FieldDef* GetValue<const FieldDef*>(Value v) {
+  return static_cast<const FieldDef*>(upb_value_getfielddef(v));
+}
+
+template <> inline Value MakeValue<FieldDef*>(FieldDef* v) {
+  return upb_value_fielddef(v);
+}
+
+inline MessageDef* FieldDef::message() {
+  return MessageDef::Cast(upb_fielddef_msgdef(this));
+}
+inline const MessageDef* FieldDef::message() const {
+  return MessageDef::Cast(upb_fielddef_msgdef(this));
+}
+
+inline const Def* FieldDef::subdef() const {
+  return Def::Cast(upb_fielddef_subdef(this));
+}
+inline bool FieldDef::set_subdef(Def* def) {
+  return upb_fielddef_setsubdef(this, def);
+}
+
 }  // namespace upb

 #endif
--- a/bindings/cpp/upb/handlers.cc
+++ b/bindings/cpp/upb/handlers.cc
@ -0,0 +1,39 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+
+#include "handlers.hpp"
+
+#include "def.hpp"
+
+namespace upb {
+
+namespace {
+
+void MessageCallbackWrapper(
+    void* closure, upb_mhandlers* mh, const upb_msgdef* m) {
+  Handlers::MessageRegistrationVisitor* visitor =
+      static_cast<Handlers::MessageRegistrationVisitor*>(closure);
+  visitor->OnMessage(static_cast<MessageHandlers*>(mh),
+                     static_cast<const MessageDef*>(m));
+}
+
+void FieldCallbackWrapper(
+    void* closure, upb_fhandlers* fh, const upb_fielddef* f) {
+  Handlers::MessageRegistrationVisitor* visitor =
+      static_cast<Handlers::MessageRegistrationVisitor*>(closure);
+  visitor->OnField(static_cast<FieldHandlers*>(fh),
+                   static_cast<const FieldDef*>(f));
+}
+}  // namepace
+
+MessageHandlers* Handlers::RegisterMessageDef(
+    const MessageDef& m, Handlers::MessageRegistrationVisitor* visitor) {
+  upb_mhandlers* mh = upb_handlers_regmsgdef(
+      this, &m, &MessageCallbackWrapper, &FieldCallbackWrapper, &visitor);
+  return static_cast<MessageHandlers*>(mh);
+}
+
+}  // namespace upb
--- a/bindings/cpp/upb/handlers.hpp
+++ b/bindings/cpp/upb/handlers.hpp
@ -15,11 +15,16 @@

 #include "upb/handlers.h"

+#include "upb/upb.hpp"
+
 namespace upb {

 typedef upb_fieldtype_t FieldType;
 typedef upb_flow_t Flow;
+typedef upb_sflow_t SubFlow;
 class MessageHandlers;
+class MessageDef;
+class FieldDef;

 class FieldHandlers : public upb_fhandlers {
 public:
@ -68,12 +73,11 @@ class FieldHandlers : public upb_fhandlers {
  MessageHandlers* GetSubMessageHandlers() const;
  // If set to >=0, the given hasbit will be set after the value callback is
  // called (offset relative to the current closure).
-  int32_t GetValueHasbit() const { return upb_fhandlers_getvaluehasbit(this); }
-  void SetValueHasbit(int32_t bit) { upb_fhandlers_setvaluehasbit(this, bit); }
+  int32_t GetHasbit() const { return upb_fhandlers_gethasbit(this); }
+  void SetHasbit(int32_t bit) { upb_fhandlers_sethasbit(this, bit); }

 private:
-  FieldHandlers();  // Only created by upb::Handlers.
-  ~FieldHandlers(); // Only destroyed by refcounting.
+  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(FieldHandlers);
 };

 class MessageHandlers : public upb_mhandlers {
@ -81,6 +85,13 @@ class MessageHandlers : public upb_mhandlers {
  typedef upb_startmsg_handler StartMessageHandler;
  typedef upb_endmsg_handler EndMessageHandler;

+  static MessageHandlers* Cast(upb_mhandlers* mh) {
+    return static_cast<MessageHandlers*>(mh);
+  }
+  static const MessageHandlers* Cast(const upb_mhandlers* mh) {
+    return static_cast<const MessageHandlers*>(mh);
+  }
+
  // The MessageHandlers will live at least as long as the upb::Handlers to
  // which it belongs, but can be Ref'd/Unref'd to make it live longer (which
  // will prolong the life of the underlying upb::Handlers also).
@ -89,7 +100,7 @@ class MessageHandlers : public upb_mhandlers {

  // Functions to set this message's handlers.
  // These return "this" so they can be conveniently chained, eg.
-  //   handlers->NewMessage()
+  //   handlers->NewMessageHandlers()
  //       ->SetStartMessageHandler(&StartMessage)
  //       ->SetEndMessageHandler(&EndMessage);
  MessageHandlers* SetStartMessageHandler(StartMessageHandler* h) {
@ -111,13 +122,13 @@ class MessageHandlers : public upb_mhandlers {
  FieldHandlers* NewFieldHandlersForSubmessage(uint32_t n, const char *name,
                                               FieldType type, bool repeated,
                                               MessageHandlers* subm) {
+    (void)name;
    return static_cast<FieldHandlers*>(
        upb_mhandlers_newfhandlers_subm(this, n, type, repeated, subm));
  }

 private:
-  MessageHandlers();  // Only created by upb::Handlers.
-  ~MessageHandlers(); // Only destroyed by refcounting.
+  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(MessageHandlers);
 };

 class Handlers : public upb_handlers {
@ -134,17 +145,29 @@ class Handlers : public upb_handlers {
    return static_cast<MessageHandlers*>(upb_handlers_newmhandlers(this));
  }

+  // Convenience function for registering handlers for all messages and fields
+  // in a MessageDef and all its children.  For every registered message,
+  // OnMessage will be called on the visitor with newly-created MessageHandlers
+  // and MessageDef. Likewise with OnField will be called with newly-created
+  // FieldHandlers and FieldDef for each field.
+  class MessageRegistrationVisitor {
+   public:
+    virtual ~MessageRegistrationVisitor() {}
+    virtual void OnMessage(MessageHandlers* mh, const MessageDef* m) = 0;
+    virtual void OnField(FieldHandlers* fh, const FieldDef* f) = 0;
+  };
+  MessageHandlers* RegisterMessageDef(const MessageDef& m,
+                                      MessageRegistrationVisitor* visitor);
+
 private:
-  Handlers();  // Only created by Handlers::New().
-  ~Handlers(); // Only destroyed by refcounting.
+  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(Handlers);
 };

-
-MessageHandlers* FieldHandlers::GetMessageHandlers() const {
+inline MessageHandlers* FieldHandlers::GetMessageHandlers() const {
  return static_cast<MessageHandlers*>(upb_fhandlers_getmsg(this));
 }

-MessageHandlers* FieldHandlers::GetSubMessageHandlers() const {
+inline MessageHandlers* FieldHandlers::GetSubMessageHandlers() const {
  return static_cast<MessageHandlers*>(upb_fhandlers_getsubmsg(this));
 }

--- a/bindings/cpp/upb/msg.hpp
+++ b/bindings/cpp/upb/msg.hpp
@ -0,0 +1,62 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+// Routines for reading and writing message data to an in-memory structure,
+// similar to a C struct.
+//
+// upb does not define one single message object that everyone must use.
+// Rather it defines an abstract interface for reading and writing members
+// of a message object, and all of the parsers and serializers use this
+// abstract interface.  This allows upb's parsers and serializers to be used
+// regardless of what memory management scheme or synchronization model the
+// application is using.
+//
+// A standard set of accessors is provided for doing simple reads and writes at
+// a known offset into the message.  These accessors should be used when
+// possible, because they are specially optimized -- for example, the JIT can
+// recognize them and emit specialized code instead of having to call the
+// function at all.  The application can substitute its own accessors when the
+// standard accessors are not suitable.
+
+#ifndef UPB_MSG_HPP
+#define UPB_MSG_HPP
+
+#include "upb/msg.h"
+#include "upb/handlers.hpp"
+
+namespace upb {
+
+typedef upb_accessor_vtbl AccessorVTable;
+
+// Registers handlers for writing into a message of the given type using
+// whatever accessors it has defined.
+inline MessageHandlers* RegisterWriteHandlers(upb::Handlers* handlers,
+                                              const upb::MessageDef* md) {
+  return MessageHandlers::Cast(
+      upb_accessors_reghandlers(handlers, md));
+}
+
+template <typename T> static FieldHandlers::ValueHandler* GetValueHandler();
+
+// A handy templated function that will retrieve a value handler for a given
+// C++ type.
+#define GET_VALUE_HANDLER(type, ctype) \
+    template <> \
+    FieldHandlers::ValueHandler* GetValueHandler<ctype>() { \
+      return &upb_stdmsg_set ## type; \
+    }
+
+GET_VALUE_HANDLER(double, double);
+GET_VALUE_HANDLER(float, float);
+GET_VALUE_HANDLER(uint64, uint64_t);
+GET_VALUE_HANDLER(uint32, uint32_t);
+GET_VALUE_HANDLER(int64, int64_t);
+GET_VALUE_HANDLER(int32, int32_t);
+GET_VALUE_HANDLER(bool, bool);
+#undef GET_VALUE_HANDLER
+
+}  // namespace
+
+#endif
--- a/bindings/cpp/upb/pb/glue.hpp
+++ b/bindings/cpp/upb/pb/glue.hpp
@ -13,11 +13,23 @@

 namespace upb {

+// All routines that load descriptors expect the descriptor to be a
+// FileDescriptorSet.
 bool LoadDescriptorFileIntoSymtab(SymbolTable* s, const char *fname,
                                  Status* status) {
  return upb_load_descriptor_file_into_symtab(s, fname, status);
 }

+bool LoadDescriptorIntoSymtab(SymbolTable* s, const char* str,
+                              size_t len, Status* status) {
+  return upb_load_descriptor_into_symtab(s, str, len, status);
+}
+
+template <typename T>
+bool LoadDescriptorIntoSymtab(SymbolTable* s, const T& desc, Status* status) {
+  return upb_load_descriptor_into_symtab(s, desc.c_str(), desc.size(), status);
+}
+
 }  // namespace upb

 #endif
--- a/bindings/cpp/upb/proto2_bridge.cc
+++ b/bindings/cpp/upb/proto2_bridge.cc
@ -0,0 +1,892 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+
+#include <string>
+#include <typeinfo>
+#include "upb/bytestream.hpp"
+#include "upb/def.hpp"
+#include "upb/handlers.hpp"
+#include "upb/msg.hpp"
+#include "upb/proto2_bridge.hpp"
+
+namespace {
+
+static void* GetFieldPointer(void *message, const upb::FieldDef* f) {
+  return static_cast<char*>(message) + f->offset();
+}
+
+}  // namespace
+
+#ifdef UPB_GOOGLE3
+
+// TODO(haberman): friend upb so that this isn't required.
+#define protected public
+#include "net/proto2/public/repeated_field.h"
+#undef private
+
+#define private public
+#include "net/proto/proto2_reflection.h"
+#undef private
+
+#include "net/proto2/proto/descriptor.pb.h"
+#include "net/proto2/public/descriptor.h"
+#include "net/proto2/public/generated_message_reflection.h"
+#include "net/proto2/public/lazy_field.h"
+#include "net/proto2/public/message.h"
+#include "net/proto2/public/string_piece_field_support.h"
+#include "net/proto/internal_layout.h"
+#include "strings/cord.h"
+using ::proto2::Descriptor;
+using ::proto2::EnumDescriptor;
+using ::proto2::EnumValueDescriptor;
+using ::proto2::FieldDescriptor;
+using ::proto2::FieldOptions;
+using ::proto2::FileDescriptor;
+using ::proto2::internal::GeneratedMessageReflection;
+using ::proto2::internal::RepeatedPtrFieldBase;
+using ::proto2::internal::StringPieceField;
+using ::proto2::Message;
+using ::proto2::MessageFactory;
+using ::proto2::Reflection;
+using ::proto2::RepeatedField;
+using ::proto2::RepeatedPtrField;
+
+namespace upb {
+
+static const Message* GetPrototypeForField(const Message& m,
+                                           const FieldDescriptor* f);
+
+namespace proto2_bridge_google3 { class FieldAccessor; }
+
+using ::upb::proto2_bridge_google3::FieldAccessor;
+
+namespace proto2_bridge_google3 {
+
+static void AssignToCord(const ByteRegion* r, Cord* cord) {
+  // TODO(haberman): ref source data if source is a cord.
+  cord->Clear();
+  uint64_t ofs = r->start_ofs();
+  while (ofs < r->end_ofs()) {
+    size_t len;
+    const char *buf = r->GetPtr(ofs, &len);
+    cord->Append(StringPiece(buf, len));
+    ofs += len;
+  }
+}
+
+#else
+
+// TODO(haberman): friend upb so that this isn't required.
+#define protected public
+#include "google/protobuf/repeated_field.h"
+#undef protected
+
+#define private public
+#include "google/protobuf/generated_message_reflection.h"
+#undef private
+
+#include "google/protobuf/descriptor.h"
+#include "google/protobuf/descriptor.pb.h"
+#include "google/protobuf/message.h"
+using ::google::protobuf::Descriptor;
+using ::google::protobuf::EnumDescriptor;
+using ::google::protobuf::EnumValueDescriptor;
+using ::google::protobuf::FieldDescriptor;
+using ::google::protobuf::FieldOptions;
+using ::google::protobuf::FileDescriptor;
+using ::google::protobuf::internal::GeneratedMessageReflection;
+using ::google::protobuf::internal::RepeatedPtrFieldBase;
+using ::google::protobuf::Message;
+using ::google::protobuf::MessageFactory;
+using ::google::protobuf::Reflection;
+using ::google::protobuf::RepeatedField;
+using ::google::protobuf::RepeatedPtrField;
+
+namespace upb {
+static const Message* GetPrototypeForField(const Message& m,
+                                           const FieldDescriptor* f);
+
+namespace proto2_bridge_opensource { class FieldAccessor; }
+
+using ::upb::proto2_bridge_opensource::FieldAccessor;
+
+namespace proto2_bridge_opensource {
+
+#endif  // ifdef UPB_GOOGLE3
+
+// Have to define this manually since older versions of proto2 didn't define
+// an enum value for STRING.
+#define UPB_CTYPE_STRING 0
+
+// The code in this class depends on the internal representation of the proto2
+// generated classes, which is an internal implementation detail of proto2 and
+// is not a public interface.  As a result, this class's implementation may
+// need to be changed if/when proto2 changes its internal representation.  It
+// is intended that this class is the only code that depends on these internal,
+// non-public interfaces.
+//
+// This class only works with messages that use GeneratedMessageReflection.
+// Other reflection classes will need other accessor implementations.
+class FieldAccessor {
+ public:
+  // Returns true if we were able to set an accessor and any other properties
+  // of the FieldDef that are necessary to read/write this field to a
+  // proto2::Message.
+  static bool TrySet(const FieldDescriptor* proto2_f,
+                     const upb::MessageDef* md,
+                     upb::FieldDef* upb_f) {
+    const Message* prototype = static_cast<const Message*>(md->prototype);
+    const Reflection* base_r = prototype->GetReflection();
+    const GeneratedMessageReflection* r =
+        dynamic_cast<const GeneratedMessageReflection*>(base_r);
+    // Old versions of the open-source protobuf release erroneously default to
+    // Cord even though that has never been supported in the open-source
+    // release.
+    int32_t ctype = proto2_f->options().has_ctype() ?
+        proto2_f->options().ctype() : UPB_CTYPE_STRING;
+    if (!r) return false;
+    // Extensions not supported yet.
+    if (proto2_f->is_extension()) return false;
+
+    upb_f->set_accessor(GetForFieldDescriptor(proto2_f, ctype));
+    upb_f->set_hasbit(GetHasbit(proto2_f, r));
+    upb_f->set_offset(GetOffset(proto2_f, r));
+    if (upb_f->IsSubmessage()) {
+      upb_f->set_subtype_name(proto2_f->message_type()->full_name());
+      upb_f->prototype = GetPrototypeForField(*prototype, proto2_f);
+    }
+
+    if (upb_f->IsString() && !upb_f->IsSequence() &&
+        ctype == UPB_CTYPE_STRING) {
+      upb_f->prototype = &r->GetStringReference(*prototype, proto2_f, NULL);
+    }
+    return true;
+  }
+
+  static MessageFactory* GetMessageFactory(const Message& m) {
+    const GeneratedMessageReflection* r =
+        dynamic_cast<const GeneratedMessageReflection*>(m.GetReflection());
+    return r ? r->message_factory_ : NULL;
+  }
+
+ private:
+  static int64_t GetHasbit(const FieldDescriptor* f,
+                           const GeneratedMessageReflection* r) {
+    if (f->is_repeated()) {
+      // proto2 does not store hasbits for repeated fields.
+      return -1;
+    } else {
+      return (r->has_bits_offset_ * 8) + f->index();
+    }
+  }
+
+  static uint16_t GetOffset(const FieldDescriptor* f,
+                            const GeneratedMessageReflection* r) {
+    return r->offsets_[f->index()];
+  }
+
+  static AccessorVTable *GetForFieldDescriptor(const FieldDescriptor* f,
+                                               int32_t ctype) {
+    switch (f->cpp_type()) {
+      case FieldDescriptor::CPPTYPE_ENUM:
+        // Should handlers validate enum membership to match proto2?
+      case FieldDescriptor::CPPTYPE_INT32: return Get<int32_t>();
+      case FieldDescriptor::CPPTYPE_INT64: return Get<int64_t>();
+      case FieldDescriptor::CPPTYPE_UINT32: return Get<uint32_t>();
+      case FieldDescriptor::CPPTYPE_UINT64: return Get<uint64_t>();
+      case FieldDescriptor::CPPTYPE_DOUBLE: return Get<double>();
+      case FieldDescriptor::CPPTYPE_FLOAT: return Get<float>();
+      case FieldDescriptor::CPPTYPE_BOOL: return Get<bool>();
+      case FieldDescriptor::CPPTYPE_STRING:
+        switch (ctype) {
+#ifdef UPB_GOOGLE3
+          case FieldOptions::STRING:
+            return GetForString<string>();
+          case FieldOptions::CORD:
+            return GetForCord();
+          case FieldOptions::STRING_PIECE:
+            return GetForStringPiece();
+#else
+          case UPB_CTYPE_STRING:
+            return GetForString<std::string>();
+#endif
+          default: return NULL;
+        }
+      case FieldDescriptor::CPPTYPE_MESSAGE:
+#ifdef UPB_GOOGLE3
+        if (f->options().lazy()) {
+          return NULL;  // Not yet implemented.
+        } else {
+          return GetForMessage();
+        }
+#else
+        return GetForMessage();
+#endif
+      default: return NULL;
+    }
+  }
+
+  // PushOffset handler (used for StartSequence and others)  ///////////////////
+
+  static SubFlow PushOffset(void *m, Value fval) {
+    const FieldDef *f = GetValue<const FieldDef*>(fval);
+    return UPB_CONTINUE_WITH(GetFieldPointer(m, f));
+  }
+
+  // Primitive Value (numeric, enum, bool) /////////////////////////////////////
+
+  template <typename T> static AccessorVTable *Get() {
+    static upb_accessor_vtbl vtbl = {
+      NULL,  // StartSubMessage handler
+      GetValueHandler<T>(),
+      &PushOffset,  // StartSequence handler
+      NULL,  // StartRepeatedSubMessage handler
+      &Append<T>,
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  template <typename T>
+  static Flow Append(void *_r, Value fval, Value val) {
+    (void)fval;
+    RepeatedField<T>* r = static_cast<RepeatedField<T>*>(_r);
+    r->Add(GetValue<T>(val));
+    return UPB_CONTINUE;
+  }
+
+  // String ////////////////////////////////////////////////////////////////////
+
+  template <typename T> static AccessorVTable *GetForString() {
+    static upb_accessor_vtbl vtbl = {
+      NULL,  // StartSubMessage handler
+      &SetString<T>,
+      &PushOffset,  // StartSequence handler
+      NULL,  // StartRepeatedSubMessage handler
+      &AppendString<T>,
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  // This needs to be templated because google3 string is not std::string.
+  template <typename T> static Flow SetString(void *m, Value fval, Value val) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    T **str = static_cast<T**>(GetFieldPointer(m, f));
+    // If it points to the default instance, we must create a new instance.
+    if (*str == f->prototype) *str = new T();
+    GetValue<ByteRegion*>(val)->AssignToString(*str);
+    return UPB_CONTINUE;
+  }
+
+  template <typename T>
+  static Flow AppendString(void *_r, Value fval, Value val) {
+    (void)fval;
+    RepeatedPtrField<T>* r = static_cast<RepeatedPtrField<T>*>(_r);
+    GetValue<ByteRegion*>(val)->AssignToString(r->Add());
+    return UPB_CONTINUE;
+  }
+
+  // SubMessage ////////////////////////////////////////////////////////////////
+
+  static AccessorVTable *GetForMessage() {
+    static upb_accessor_vtbl vtbl = {
+      &StartSubMessage,
+      NULL,  // Value handler
+      &PushOffset,  // StartSequence handler
+      &StartRepeatedSubMessage,
+      NULL,  // Repeated value handler
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  static SubFlow StartSubMessage(void *m, Value fval) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    void **subm = static_cast<void**>(GetFieldPointer(m, f));
+    if (*subm == NULL || *subm == f->prototype) {
+      const Message* prototype = static_cast<const Message*>(f->prototype);
+      *subm = prototype->New();
+    }
+    return UPB_CONTINUE_WITH(*subm);
+  }
+
+  class RepeatedMessageTypeHandler {
+   public:
+    typedef void Type;
+    // AddAllocated() calls this, but only if other objects are sitting
+    // around waiting for reuse, which we will not do.
+    static void Delete(Type* t) {
+      (void)t;
+      assert(false);
+    }
+  };
+
+  // Closure is a RepeatedPtrField<SubMessageType>*, but we access it through
+  // its base class RepeatedPtrFieldBase*.
+  static SubFlow StartRepeatedSubMessage(void* _r, Value fval) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    RepeatedPtrFieldBase *r = static_cast<RepeatedPtrFieldBase*>(_r);
+    void *submsg = r->AddFromCleared<RepeatedMessageTypeHandler>();
+    if (!submsg) {
+      const Message* prototype = static_cast<const Message*>(f->prototype);
+      submsg = prototype->New();
+      r->AddAllocated<RepeatedMessageTypeHandler>(submsg);
+    }
+    return UPB_CONTINUE_WITH(submsg);
+  }
+
+  // TODO(haberman): handle Extensions, Unknown Fields.
+
+#ifdef UPB_GOOGLE3
+  // Handlers for types/features only included in internal proto2 release:
+  // Cord, StringPiece, LazyField, and MessageSet.
+  // TODO(haberman): LazyField, MessageSet.
+
+  // Cord //////////////////////////////////////////////////////////////////////
+
+  static AccessorVTable *GetForCord() {
+    static upb_accessor_vtbl vtbl = {
+      NULL,  // StartSubMessage handler
+      &SetCord,
+      &PushOffset,  // StartSequence handler
+      NULL,  // StartRepeatedSubMessage handler
+      &AppendCord,
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  static Flow SetCord(void *m, Value fval, Value val) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    Cord* field = static_cast<Cord*>(GetFieldPointer(m, f));
+    AssignToCord(GetValue<ByteRegion*>(val), field);
+    return UPB_CONTINUE;
+  }
+
+  static Flow AppendCord(void *_r, Value fval, Value val) {
+    RepeatedField<Cord>* r = static_cast<RepeatedField<Cord>*>(_r);
+    AssignToCord(GetValue<ByteRegion*>(val), r->Add());
+    return UPB_CONTINUE;
+  }
+
+  // StringPiece ///////////////////////////////////////////////////////////////
+
+  static AccessorVTable *GetForStringPiece() {
+    static upb_accessor_vtbl vtbl = {
+      NULL,  // StartSubMessage handler
+      &SetStringPiece,
+      &PushOffset,  // StartSequence handler
+      NULL,  // StartRepeatedSubMessage handler
+      &AppendStringPiece,
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  static void AssignToStringPieceField(const ByteRegion* r,
+                                       proto2::internal::StringPieceField* f) {
+    // TODO(haberman): alias if possible and enabled on the input stream.
+    // TODO(haberman): add a method to StringPieceField that lets us avoid
+    // this copy/malloc/free.
+    char *data = new char[r->Length()];
+    r->Copy(r->start_ofs(), r->Length(), data);
+    f->CopyFrom(StringPiece(data, r->Length()));
+    delete[] data;
+  }
+
+  static Flow SetStringPiece(void *m, Value fval, Value val) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    StringPieceField* field =
+        static_cast<StringPieceField*>(GetFieldPointer(m, f));
+    AssignToStringPieceField(GetValue<ByteRegion*>(val), field);
+    return UPB_CONTINUE;
+  }
+
+  static Flow AppendStringPiece(void* _r, Value fval, Value val) {
+    RepeatedPtrField<StringPieceField>* r =
+        static_cast<RepeatedPtrField<StringPieceField>*>(_r);
+    AssignToStringPieceField(GetValue<ByteRegion*>(val), r->Add());
+    return UPB_CONTINUE;
+  }
+
+#endif  // UPB_GOOGLE3
+};
+
+#ifdef UPB_GOOGLE3
+
+// Proto1 accessor -- only needed inside Google.
+class Proto1FieldAccessor {
+ public:
+  // Returns true if we were able to set an accessor and any other properties
+  // of the FieldDef that are necessary to read/write this field to a
+  // proto2::Message.
+  static bool TrySet(const FieldDescriptor* proto2_f,
+                     const upb::MessageDef* md,
+                     upb::FieldDef* upb_f) {
+    const Message* m = static_cast<const Message*>(md->prototype);
+    const proto2::Reflection* base_r = m->GetReflection();
+    const _pi::Proto2Reflection* r =
+        dynamic_cast<const _pi::Proto2Reflection*>(base_r);
+    if (!r) return false;
+    // Extensions not supported yet.
+    if (proto2_f->is_extension()) return false;
+
+    const _pi::Field* f = r->GetFieldLayout(proto2_f);
+
+    if (f->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK) {
+      // Override the BYTES type that proto2 descriptors have for weak fields.
+      upb_f->set_type(UPB_TYPE(MESSAGE));
+    }
+
+    if (upb_f->IsSubmessage()) {
+      const Message* prototype = upb::GetPrototypeForField(*m, proto2_f);
+      upb_f->set_subtype_name(prototype->GetDescriptor()->full_name());
+      upb_f->prototype = prototype;
+    }
+
+    upb_f->set_accessor(GetForCrep(f->crep));
+    upb_f->set_hasbit(GetHasbit(proto2_f, r));
+    upb_f->set_offset(GetOffset(proto2_f, r));
+    return true;
+  }
+
+ private:
+  static int16_t GetHasbit(const FieldDescriptor* f,
+                           const _pi::Proto2Reflection* r) {
+    if (f->is_repeated()) {
+      // proto1 does not store hasbits for repeated fields.
+      return -1;
+    } else {
+      return (r->layout_->has_bit_offset * 8) + r->GetFieldLayout(f)->has_index;
+    }
+  }
+
+  static uint16_t GetOffset(const FieldDescriptor* f,
+                            const _pi::Proto2Reflection* r) {
+    return r->GetFieldLayout(f)->offset;
+  }
+
+  static AccessorVTable *GetForCrep(int crep) {
+#define PRIMITIVE(name, type_name) \
+    case _pi::CREP_REQUIRED_ ## name: \
+    case _pi::CREP_OPTIONAL_ ## name: \
+    case _pi::CREP_REPEATED_ ## name: return Get<type_name>();
+
+    switch (crep) {
+      PRIMITIVE(DOUBLE,   double);
+      PRIMITIVE(FLOAT,    float);
+      PRIMITIVE(INT64,    int64_t);
+      PRIMITIVE(UINT64,   uint64_t);
+      PRIMITIVE(INT32,    int32_t);
+      PRIMITIVE(FIXED64,  uint64_t);
+      PRIMITIVE(FIXED32,  uint32_t);
+      PRIMITIVE(BOOL,     bool);
+      case _pi::CREP_REQUIRED_STRING:
+      case _pi::CREP_OPTIONAL_STRING:
+      case _pi::CREP_REPEATED_STRING: return GetForString();
+      case _pi::CREP_OPTIONAL_OUTOFLINE_STRING: return GetForOutOfLineString();
+      case _pi::CREP_REQUIRED_CORD:
+      case _pi::CREP_OPTIONAL_CORD:
+      case _pi::CREP_REPEATED_CORD: return GetForCord();
+      case _pi::CREP_REQUIRED_GROUP:
+      case _pi::CREP_REQUIRED_FOREIGN:
+      case _pi::CREP_REQUIRED_FOREIGN_PROTO2: return GetForRequiredMessage();
+      case _pi::CREP_OPTIONAL_GROUP:
+      case _pi::CREP_REPEATED_GROUP:
+      case _pi::CREP_OPTIONAL_FOREIGN:
+      case _pi::CREP_REPEATED_FOREIGN:
+      case _pi::CREP_OPTIONAL_FOREIGN_PROTO2:
+      case _pi::CREP_REPEATED_FOREIGN_PROTO2: return GetForMessage();
+      case _pi::CREP_OPTIONAL_FOREIGN_WEAK: return GetForWeakMessage();
+      default: assert(false); return NULL;
+    }
+#undef PRIMITIVE
+  }
+
+  // PushOffset handler (used for StartSequence and others)  ///////////////////
+
+  // We can find a RepeatedField* or a RepeatedPtrField* at f->offset().
+  static SubFlow PushOffset(void *m, Value fval) {
+    const FieldDef *f = GetValue<const FieldDef*>(fval);
+    return UPB_CONTINUE_WITH(GetFieldPointer(m, f));
+  }
+
+  // Primitive Value (numeric, enum, bool) /////////////////////////////////////
+
+  template <typename T> static AccessorVTable *Get() {
+    static upb_accessor_vtbl vtbl = {
+      NULL,  // StartSubMessage handler
+      GetValueHandler<T>(),
+      &PushOffset,  // StartSequence handler
+      NULL,  // StartRepeatedSubMessage handler
+      &Append<T>,
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  template <typename T>
+  static Flow Append(void *_r, Value fval, Value val) {
+    (void)fval;
+    // Proto1's ProtoArray class derives from RepeatedField.
+    RepeatedField<T>* r = static_cast<RepeatedField<T>*>(_r);
+    r->Add(GetValue<T>(val));
+    return UPB_CONTINUE;
+  }
+
+  // String ////////////////////////////////////////////////////////////////////
+
+  static AccessorVTable *GetForString() {
+    static upb_accessor_vtbl vtbl = {
+      NULL,  // StartSubMessage handler
+      &SetString,
+      &PushOffset,  // StartSequence handler
+      NULL,  // StartRepeatedSubMessage handler
+      &AppendString,
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  static Flow SetString(void *m, Value fval, Value val) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    string *str = static_cast<string*>(GetFieldPointer(m, f));
+    GetValue<ByteRegion*>(val)->AssignToString(str);
+    return UPB_CONTINUE;
+  }
+
+  static Flow AppendString(void *_r, Value fval, Value val) {
+    (void)fval;
+    RepeatedPtrField<string>* r = static_cast<RepeatedPtrField<string>*>(_r);
+    GetValue<ByteRegion*>(val)->AssignToString(r->Add());
+    return UPB_CONTINUE;
+  }
+
+  // Out-of-line string ////////////////////////////////////////////////////////
+
+  static AccessorVTable *GetForOutOfLineString() {
+    static upb_accessor_vtbl vtbl = {
+      NULL, &SetOutOfLineString,
+      // This type is only used for non-repeated string fields.
+      NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  static Flow SetOutOfLineString(void *m, Value fval, Value val) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    string **str = static_cast<string**>(GetFieldPointer(m, f));
+    if (*str == &::ProtocolMessage::___empty_internal_proto_string_)
+      *str = new string();
+    GetValue<ByteRegion*>(val)->AssignToString(*str);
+    return UPB_CONTINUE;
+  }
+
+  // Cord //////////////////////////////////////////////////////////////////////
+
+  static AccessorVTable *GetForCord() {
+    static upb_accessor_vtbl vtbl = {
+      NULL,  // StartSubMessage handler
+      &SetCord,
+      &PushOffset,  // StartSequence handler
+      NULL,  // StartRepeatedSubMessage handler
+      &AppendCord,
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  static Flow SetCord(void *m, Value fval, Value val) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    Cord* field = static_cast<Cord*>(GetFieldPointer(m, f));
+    AssignToCord(GetValue<ByteRegion*>(val), field);
+    return UPB_CONTINUE;
+  }
+
+  static Flow AppendCord(void *_r, Value fval, Value val) {
+    RepeatedField<Cord>* r = static_cast<RepeatedField<Cord>*>(_r);
+    AssignToCord(GetValue<ByteRegion*>(val), r->Add());
+    return UPB_CONTINUE;
+  }
+
+  // SubMessage ////////////////////////////////////////////////////////////////
+
+  static AccessorVTable *GetForRequiredMessage() {
+    static upb_accessor_vtbl vtbl = {
+      &PushOffset,  // StartSubMessage handler
+      NULL,  // Value handler
+      &PushOffset,  // StartSequence handler
+      &StartRepeatedSubMessage,
+      NULL,  // Repeated value handler
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  static AccessorVTable *GetForWeakMessage() {
+    static upb_accessor_vtbl vtbl = {
+      &StartWeakSubMessage,  // StartSubMessage handler
+      NULL,  // Value handler
+      &PushOffset,  // StartSequence handler
+      &StartRepeatedSubMessage,
+      NULL,  // Repeated value handler
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  static AccessorVTable *GetForMessage() {
+    static upb_accessor_vtbl vtbl = {
+      &StartSubMessage,
+      NULL,  // Value handler
+      &PushOffset,  // StartSequence handler
+      &StartRepeatedSubMessage,
+      NULL,  // Repeated value handler
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  static SubFlow StartSubMessage(void *m, Value fval) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    Message **subm = static_cast<Message**>(GetFieldPointer(m, f));
+    if (*subm == f->prototype) *subm = (*subm)->New();
+    return UPB_CONTINUE_WITH(*subm);
+  }
+
+  static SubFlow StartWeakSubMessage(void *m, Value fval) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    Message **subm = static_cast<Message**>(GetFieldPointer(m, f));
+    if (*subm == NULL) {
+      const Message* prototype = static_cast<const Message*>(f->prototype);
+      *subm = prototype->New();
+    }
+    return UPB_CONTINUE_WITH(*subm);
+  }
+
+  class RepeatedMessageTypeHandler {
+   public:
+    typedef void Type;
+    // AddAllocated() calls this, but only if other objects are sitting
+    // around waiting for reuse, which we will not do.
+    static void Delete(Type* t) {
+      (void)t;
+      assert(false);
+    }
+  };
+
+  // Closure is a RepeatedPtrField<SubMessageType>*, but we access it through
+  // its base class RepeatedPtrFieldBase*.
+  static SubFlow StartRepeatedSubMessage(void* _r, Value fval) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    RepeatedPtrFieldBase *r = static_cast<RepeatedPtrFieldBase*>(_r);
+    void *submsg = r->AddFromCleared<RepeatedMessageTypeHandler>();
+    if (!submsg) {
+      const Message* prototype = static_cast<const Message*>(f->prototype);
+      submsg = prototype->New();
+      r->AddAllocated<RepeatedMessageTypeHandler>(submsg);
+    }
+    return UPB_CONTINUE_WITH(submsg);
+  }
+};
+
+#endif
+
+}  // namespace proto2_bridge_{google3,opensource}
+
+static const Message* GetPrototypeForMessage(const Message& m) {
+  const Message* ret = NULL;
+  MessageFactory* factory = FieldAccessor::GetMessageFactory(m);
+  if (factory) {
+    // proto2 generated message or DynamicMessage.
+    ret = factory->GetPrototype(m.GetDescriptor());
+    assert(ret);
+  } else {
+    // Proto1 message; since proto1 has no dynamic message, it must be
+    // from the generated factory.
+    ret = MessageFactory::generated_factory()->GetPrototype(m.GetDescriptor());
+    assert(ret);  // If NULL, then wasn't a proto1 message, can't handle it.
+  }
+  assert(ret->GetReflection() == m.GetReflection());
+  return ret;
+}
+
+static const Message* GetPrototypeForField(const Message& m,
+                                           const FieldDescriptor* f) {
+#ifdef UPB_GOOGLE3
+  if (f->type() == FieldDescriptor::TYPE_BYTES) {
+    // Proto1 weak field: the proto2 descriptor says their type is BYTES.
+    const _pi::Proto2Reflection* r =
+        dynamic_cast<const _pi::Proto2Reflection*>(m.GetReflection());
+    assert(r);
+    const _pi::Field* field = r->GetFieldLayout(f);
+    assert(field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK);
+    return GetPrototypeForMessage(
+        *static_cast<const Message*>(field->weak_layout()->default_instance));
+  } else if (dynamic_cast<const _pi::Proto2Reflection*>(m.GetReflection())) {
+    // Proto1 message; since proto1 has no dynamic message, it must be from
+    // the generated factory.
+    const Message* ret =
+        MessageFactory::generated_factory()->GetPrototype(f->message_type());
+    assert(ret);
+    return ret;
+  }
+#endif
+  assert(f->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE);
+  // We assume that all submessages (and extensions) will be constructed using
+  // the same MessageFactory as this message.  This doesn't cover the case of
+  // CodedInputStream::SetExtensionRegistry().
+  MessageFactory* factory = FieldAccessor::GetMessageFactory(m);
+  assert(factory);  // If neither proto1 nor proto2 we can't handle it.
+  const Message* ret = factory->GetPrototype(f->message_type());
+  assert(ret);
+  return ret;
+}
+
+namespace proto2_bridge {
+
+upb::FieldDef* AddFieldDef(const FieldDescriptor* f, upb::MessageDef* md) {
+  upb::FieldDef* upb_f = upb::FieldDef::New(&upb_f);
+  upb_f->set_number(f->number());
+  upb_f->set_name(f->name());
+  upb_f->set_label(static_cast<upb::Label>(f->label()));
+  upb_f->set_type(static_cast<upb::FieldType>(f->type()));
+
+  if (!FieldAccessor::TrySet(f, md, upb_f)
+#ifdef UPB_GOOGLE3
+      && !proto2_bridge_google3::Proto1FieldAccessor::TrySet(f, md, upb_f)
+#endif
+     ) {
+    // Unsupported reflection class.
+    assert(false);
+  }
+
+  if (upb_f->type() == UPB_TYPE(ENUM)) {
+    // We set the enum default symbolically.
+    upb_f->set_default(f->default_value_enum()->name());
+    upb_f->set_subtype_name(f->enum_type()->full_name());
+  } else {
+    // Set field default for primitive types.  Need to switch on the upb type
+    // rather than the proto2 type, because upb_f->type() may have been changed
+    // from BYTES to MESSAGE for a weak field.
+    switch (upb_types[upb_f->type()].inmemory_type) {
+      case UPB_CTYPE_INT32:
+        upb_f->set_default(MakeValue(f->default_value_int32()));
+        break;
+      case UPB_CTYPE_INT64:
+        upb_f->set_default(
+            MakeValue(static_cast<int64_t>(f->default_value_int64())));
+        break;
+      case UPB_CTYPE_UINT32:
+        upb_f->set_default(MakeValue(f->default_value_uint32()));
+        break;
+      case UPB_CTYPE_UINT64:
+        upb_f->set_default(
+            MakeValue(static_cast<uint64_t>(f->default_value_uint64())));
+        break;
+      case UPB_CTYPE_DOUBLE:
+        upb_f->set_default(MakeValue(f->default_value_double()));
+        break;
+      case UPB_CTYPE_FLOAT:
+        upb_f->set_default(MakeValue(f->default_value_float()));
+        break;
+      case UPB_CTYPE_BOOL:
+        upb_f->set_default(MakeValue(f->default_value_bool()));
+        break;
+      case UPB_CTYPE_BYTEREGION:
+        upb_f->set_default(f->default_value_string());
+        break;
+    }
+  }
+  return md->AddField(upb_f, &upb_f) ? upb_f : NULL;
+}
+
+upb::MessageDef *NewEmptyMessageDef(const Message& m, void *owner) {
+  upb::MessageDef *md = upb::MessageDef::New(owner);
+  md->set_full_name(m.GetDescriptor()->full_name());
+  md->prototype = GetPrototypeForMessage(m);
+  return md;
+}
+
+upb::EnumDef* NewEnumDef(const EnumDescriptor* desc, void *owner) {
+  upb::EnumDef* e = upb::EnumDef::New(owner);
+  e->set_full_name(desc->full_name());
+  for (int i = 0; i < desc->value_count(); i++) {
+    const EnumValueDescriptor* val = desc->value(i);
+    bool success = e->AddValue(val->name(), val->number());
+    assert(success);
+    (void)success;
+  }
+  return e;
+}
+
+void AddAllFields(upb::MessageDef* md) {
+  const Descriptor* d =
+      static_cast<const Message*>(md->prototype)->GetDescriptor();
+  for (int i = 0; i < d->field_count(); i++) {
+#ifdef UPB_GOOGLE3
+    // Skip lazy fields for now since we can't properly handle them.
+    if (d->field(i)->options().lazy()) continue;
+#endif
+    // Extensions not supported yet.
+    if (d->field(i)->is_extension()) continue;
+    AddFieldDef(d->field(i), md);
+  }
+}
+
+upb::MessageDef *NewFullMessageDef(const Message& m, void *owner) {
+  upb::MessageDef* md = NewEmptyMessageDef(m, owner);
+  AddAllFields(md);
+  // TODO(haberman): add unknown field handler and extensions.
+  return md;
+}
+
+typedef std::map<std::string, upb::Def*> SymbolMap;
+
+static upb::MessageDef* NewFinalMessageDefHelper(const Message& m, void *owner,
+                                                 SymbolMap* symbols) {
+  upb::MessageDef* md = NewFullMessageDef(m, owner);
+  // Must do this before processing submessages to prevent infinite recursion.
+  (*symbols)[std::string(md->full_name())] = md->AsDef();
+
+  for (upb::MessageDef::Iterator i(md); !i.Done(); i.Next()) {
+    upb::FieldDef* f = i.field();
+    if (!f->HasSubDef()) continue;
+    SymbolMap::iterator iter = symbols->find(f->subtype_name());
+    upb::Def* subdef;
+    if (iter != symbols->end()) {
+      subdef = iter->second;
+    } else {
+      const FieldDescriptor* proto2_f =
+          m.GetDescriptor()->FindFieldByNumber(f->number());
+      if (f->type() == UPB_TYPE(ENUM)) {
+        subdef = NewEnumDef(proto2_f->enum_type(), owner)->AsDef();
+        (*symbols)[std::string(subdef->full_name())] = subdef;
+      } else {
+        assert(f->IsSubmessage());
+        const Message* prototype = GetPrototypeForField(m, proto2_f);
+        subdef = NewFinalMessageDefHelper(*prototype, owner, symbols)->AsDef();
+      }
+    }
+    f->set_subdef(subdef);
+  }
+  return md;
+}
+
+const upb::MessageDef* NewFinalMessageDef(const Message& m, void *owner) {
+  SymbolMap symbols;
+  upb::MessageDef* ret = NewFinalMessageDefHelper(m, owner, &symbols);
+
+  // Finalize defs.
+  std::vector<upb::Def*> defs;
+  SymbolMap::iterator iter;
+  for (iter = symbols.begin(); iter != symbols.end(); ++iter) {
+    defs.push_back(iter->second);
+  }
+  Status status;
+  bool success = Def::Finalize(defs, &status);
+  assert(success);
+  (void)success;
+
+  // Unref all defs except the top-level one that we are returning.
+  for (int i = 0; i < static_cast<int>(defs.size()); i++) {
+    if (defs[i] != ret->AsDef()) defs[i]->Unref(owner);
+  }
+
+  return ret;
+}
+
+}  // namespace proto2_bridge
+}  // namespace upb
--- a/bindings/cpp/upb/proto2_bridge.hpp
+++ b/bindings/cpp/upb/proto2_bridge.hpp
@ -0,0 +1,170 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// A bridge between upb and proto2, allows populating proto2 generated
+// classes using upb's parser, translating between descriptors and defs, etc.
+//
+// This is designed to be able to be compiled against either the open-source
+// version of protocol buffers or the Google-internal proto2.  The two are
+// the same in most ways, but live in different namespaces (proto2 vs
+// google::protobuf) and have a few other more minor differences.
+//
+// The bridge gives you a lot of control over which fields will be written to
+// the message (fields that are not written will just be skipped), and whether
+// unknown fields are written to the UnknownFieldSet.  This can save a lot of
+// work if the client only cares about some subset of the fields.
+//
+// Example usage:
+//
+//   // Build a def that will have all fields and parse just like proto2 would.
+//   const upb::MessageDef* md = upb::proto2_bridge::NewMessageDef(&MyProto());
+//
+//   // JIT the parser; should only be done once ahead-of-time.
+//   upb::Handlers* handlers = upb::NewHandlersForMessage(md);
+//   upb::DecoderPlan* plan = upb::DecoderPlan::New(handlers);
+//   handlers->Unref();
+//
+//   // The actual parsing.
+//   MyProto proto;
+//   upb::Decoder decoder;
+//   upb::StringSource source(buf, len);
+//   decoder.ResetPlan(plan, 0);
+//   decoder.ResetInput(source.AllBytes(), &proto);
+//   CHECK(decoder.Decode() == UPB_OK) << decoder.status();
+//
+// To parse only one field and skip all others:
+//
+//   const upb::MessageDef* md =
+//       upb::proto2_bridge::NewEmptyMessageDef(MyProto().GetPrototype());
+//   upb::proto2_bridge::AddFieldDef(
+//       MyProto::descriptor()->FindFieldByName("my_field"), md);
+//   upb::Finalize(md);
+//
+//   // Now continue with "JIT the parser" from above.
+//
+// Note that there is currently no support for
+// CodedInputStream::SetExtensionRegistry(), which allows specifying a separate
+// DescriptorPool and MessageFactory for extensions.  Since this is a property
+// of the input in proto2, it's difficult to build a plan ahead-of-time that
+// can properly support this.  If it's an important use case, the caller should
+// probably build a upb plan explicitly.
+
+#ifndef UPB_PROTO2_BRIDGE
+#define UPB_PROTO2_BRIDGE
+
+#include <vector>
+
+namespace google {
+namespace protobuf {
+class Descriptor;
+class EnumDescriptor;
+class FieldDescriptor;
+class FileDescriptor;
+class Message;
+}  // namespace google
+}  // namespace protobuf
+
+namespace proto2 {
+class Descriptor;
+class EnumDescriptor;
+class FieldDescriptor;
+class FileDescriptor;
+class Message;
+}  // namespace proto2
+
+
+namespace upb {
+
+class Def;
+class FieldDef;
+class MessageDef;
+
+namespace proto2_bridge {
+
+// Unfinalized defs ////////////////////////////////////////////////////////////
+
+// Creating of UNFINALIZED defs.  All of these functions return defs that are
+// still mutable and have not been finalized.  They must be finalized before
+// using them to parse anything.  This is useful if you want more control over
+// the process of constructing defs, eg. to add the specific set of fields you
+// care about.
+
+// Creates a new upb::MessageDef that corresponds to the type in the given
+// prototype message.  The MessageDef will not have any fields added to it.
+upb::MessageDef *NewEmptyMessageDef(const proto2::Message& m, void *owner);
+upb::MessageDef *NewEmptyMessageDef(const google::protobuf::Message& desc,
+                                    void *owner);
+
+// Adds a new upb::FieldDef to the given MessageDef corresponding to the given
+// FieldDescriptor.  The FieldDef will be given an accessor and offset so that
+// it can be used to read and write data into the proto2::Message classes.
+// The given MessageDef must have been constructed with NewEmptyDefForMessage()
+// and f->containing_type() must correspond to the message that was used.
+//
+// Any submessage, group, or enum fields will be given symbolic references to
+// the subtype, which must be resolved before the MessageDef can be finalized.
+//
+// On success, returns the FieldDef that was added (caller does not own a ref).
+// If an existing field had the same name or number, returns NULL.
+upb::FieldDef* AddFieldDef(const proto2::FieldDescriptor* f,
+                           upb::MessageDef* md);
+upb::FieldDef* AddFieldDef(const google::protobuf::FieldDescriptor* f,
+                           upb::MessageDef* md);
+
+// Given a MessageDef that was constructed with NewEmptyDefForMessage(), adds
+// FieldDefs for all fields defined in the original message, but not for any
+// extensions or unknown fields.  The given MessageDef must not have any fields
+// that have the same name or number as any of the fields we are adding (the
+// easiest way to guarantee this is to start with an empty MessageDef).
+//
+// Returns true on success or false if any of the fields could not be added.
+void AddAllFields(upb::MessageDef* md);
+
+// TODO(haberman): Add:
+// // Adds a handler that will store unknown fields in the UnknownFieldSet.
+// void AddUnknownFieldHandler(upb::MessageDef* md);
+
+// Returns a new upb::MessageDef that contains handlers for all fields, unknown
+// fields, and any extensions in the descriptor's pool.  The resulting
+// def/handlers should be equivalent to the generated code constructed by the
+// protobuf compiler (or the code in DynamicMessage) for the given type.
+// The subdefs for message/enum fields (if any) will be referenced symbolically,
+// and will need to be resolved before being finalized.
+//
+// TODO(haberman): Add missing support (LazyField, MessageSet, and extensions).
+//
+// TODO(haberman): possibly add a similar function that lets you supply a
+// separate DescriptorPool and MessageFactory for extensions, to support
+// proto2's io::CodedInputStream::SetExtensionRegistry().
+upb::MessageDef* NewFullMessageDef(const proto2::Message& m, void *owner);
+upb::MessageDef* NewFullMessageDef(const google::protobuf::Message& m,
+                                   void *owner);
+
+// Returns a new upb::EnumDef that corresponds to the given EnumDescriptor.
+// Caller owns a ref on the returned EnumDef.
+upb::EnumDef* NewEnumDef(const proto2::EnumDescriptor* desc, void *owner);
+upb::EnumDef* NewEnumDef(const google::protobuf::EnumDescriptor* desc,
+                         void *owner);
+
+// Finalized defs //////////////////////////////////////////////////////////////
+
+// These functions return FINALIZED defs, meaning that they are immutable and
+// ready for use.  Since they are immutable you cannot make any further changes
+// to eg. the set of fields, but these functions are more convenient if you
+// simply want to parse a message exactly how the built-in proto2 parser would.
+
+// Creates a returns a finalized MessageDef for the give message and its entire
+// type tree that will include all fields and unknown handlers (ie. it will
+// parse just like proto2 would).
+const upb::MessageDef* NewFinalMessageDef(const proto2::Message& m,
+                                          void *owner);
+const upb::MessageDef* NewFinalMessageDef(const google::protobuf::Message& m,
+                                          void *owner);
+
+}  // namespace proto2_bridge
+}  // namespace upb
+
+#endif
--- a/bindings/cpp/upb/upb.hpp
+++ b/bindings/cpp/upb/upb.hpp
@ -10,6 +10,16 @@
 #include "upb/upb.h"
 #include <iostream>

+#if defined(__GXX_EXPERIMENTAL_CXX0X__) && !defined(UPB_NO_CXX11)
+#define UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(class_name) \
+  class_name() = delete; \
+  ~class_name() = delete;
+#else
+#define UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(class_name) \
+  class_name(); \
+  ~class_name();
+#endif
+
 namespace upb {

 typedef upb_success_t Success;
@ -31,11 +41,35 @@ class Status : public upb_status {
  void Clear() { upb_status_clear(this); }
 };

-class Value : public upb_value {
- public:
-  Value(const upb_value& val) { *this = val; }
-  Value() {}
-};
+typedef upb_value Value;
+
+template <typename T> T GetValue(Value v);
+template <typename T> Value MakeValue(T v);
+
+#define UPB_VALUE_ACCESSORS(type, ctype) \
+  template <> inline ctype GetValue<ctype>(Value v) { \
+    return upb_value_get ## type(v); \
+  } \
+  template <> inline Value MakeValue<ctype>(ctype v) { \
+    return upb_value_ ## type(v); \
+  }
+
+UPB_VALUE_ACCESSORS(double, double);
+UPB_VALUE_ACCESSORS(float,  float);
+UPB_VALUE_ACCESSORS(int32,  int32_t);
+UPB_VALUE_ACCESSORS(int64,  int64_t);
+UPB_VALUE_ACCESSORS(uint32, uint32_t);
+UPB_VALUE_ACCESSORS(uint64, uint64_t);
+UPB_VALUE_ACCESSORS(bool,   bool);
+
+#undef UPB_VALUE_ACCESSORS
+
+template <typename T> inline T* GetPtrValue(Value v) {
+  return static_cast<T*>(upb_value_getptr(v));
+}
+template <typename T> inline Value MakePtrValue(T* v) {
+  return upb_value_ptr(static_cast<void*>(v));
+}

 INLINE std::ostream& operator<<(std::ostream& out, const Status& status) {
  out << status.GetString();
--- a/bindings/lua/upb.c
+++ b/bindings/lua/upb.c
@ -37,11 +37,15 @@ static uint32_t lupb_touint32(lua_State *L, int narg, const char *name) {
  return n;
 }

-static void lupb_pushstring(lua_State *L, const upb_byteregion *r) {
-  // TODO: could avoid a copy in the case that the string is contiguous.
-  char *str = upb_byteregion_strdup(r);
-  lua_pushlstring(L, str, upb_byteregion_len(r));
-  free(str);
+static void lupb_pushstring(lua_State *L, const upb_strref *ref) {
+  if (ref->ptr) {
+    lua_pushlstring(L, ref->ptr, ref->len);
+  } else {
+    // Lua requires a continguous string; must copy+allocate.
+    char *str = upb_strref_dup(ref);
+    lua_pushlstring(L, str, ref->len);
+    free(str);
+  }
 }

 static void lupb_pushvalue(lua_State *L, upb_value val, upb_fielddef *f) {
@ -73,7 +77,7 @@ static void lupb_pushvalue(lua_State *L, upb_value val, upb_fielddef *f) {

 // Returns a scalar value (ie. not a submessage) as a upb_value.
 static upb_value lupb_getvalue(lua_State *L, int narg, upb_fielddef *f,
-                               upb_byteregion *ref) {
+                               upb_strref *ref) {
  assert(!upb_issubmsg(f));
  upb_value val;
  if (upb_fielddef_type(f) == UPB_TYPE(BOOL)) {
@ -135,7 +139,7 @@ static upb_value lupb_getvalue(lua_State *L, int narg, upb_fielddef *f,
 }

 static void lupb_typecheck(lua_State *L, int narg, upb_fielddef *f) {
-  upb_byteregion ref;
+  upb_strref ref;
  lupb_getvalue(L, narg, f, &ref);
 }

@ -298,8 +302,8 @@ static void lupb_fielddef_set(lua_State *L, upb_fielddef *f,
  } else if (streql(field, "default_value")) {
    if (!upb_fielddef_type(f))
      luaL_error(L, "Must set type before setting default_value");
-    upb_byteregion region;
-    upb_fielddef_setdefault(f, lupb_getvalue(L, narg, f, &region));
+    upb_strref ref;
+    upb_fielddef_setdefault(f, lupb_getvalue(L, narg, f, &ref));
  } else {
    luaL_error(L, "Cannot set fielddef member '%s'", field);
  }
@ -778,7 +782,7 @@ static upb_flow_t lupb_msg_string(void *m, upb_value fval, upb_value val,
  lua_State *L = *(lua_State**)m;
  int offset = array ? lua_rawlen(L, -1) : f->offset;
  if (!lua_checkstack(L, 1)) luaL_error(L, "stack full");
-  lupb_pushstring(L, upb_value_getbyteregion(val));
+  lupb_pushstring(L, upb_value_getstrref(val));
  lua_rawseti(L, -2, offset);
  return UPB_CONTINUE;
 }
--- a/bindings/python/upb.c
+++ b/bindings/python/upb.c
@ -612,9 +612,8 @@ static upb_sflow_t PyUpb_Message_StartRepeatedSubmessage(void *a, upb_value fval
 static upb_flow_t PyUpb_Message_StringValue(void *m, upb_value fval, upb_value val) {
  PyObject **str = PyUpb_Accessor_GetPtr(m, fval);
  if (*str) { Py_DECREF(*str); }
-  upb_byteregion *r = upb_value_getbyteregion(val);
-  *str = PyString_FromStringAndSize(NULL, upb_byteregion_len(r));
-  upb_byteregion_copyall(r, PyString_AsString(*str));
+  *str = PyString_FromStringAndSize(NULL, upb_value_getstrref(val)->len);
+  upb_strref_read(upb_value_getstrref(val), PyString_AsString(*str));
  upb_stdmsg_sethas(m, fval);
  return UPB_CONTINUE;
 }
@ -622,9 +621,8 @@ static upb_flow_t PyUpb_Message_StringValue(void *m, upb_value fval, upb_value v
 static upb_flow_t PyUpb_Message_AppendStringValue(void *a, upb_value fval, upb_value val) {
  (void)fval;
  PyObject **elem = upb_stdarray_append(a, sizeof(void*));
-  upb_byteregion *r = upb_value_getbyteregion(val);
-  *elem = PyString_FromStringAndSize(NULL, upb_byteregion_len(r));
-  upb_byteregion_copyall(r, PyString_AsString(*elem));
+  *elem = PyString_FromStringAndSize(NULL, upb_value_getstrref(val)->len);
+  upb_strref_read(upb_value_getstrref(val), PyString_AsString(*elem));
  return UPB_CONTINUE;
 }

--- a/tests/test.proto
+++ b/tests/test.proto
@ -1,14 +1,10 @@

 // A series of messages with various kinds of cycles in them.
-//      +-+---+    +---+
-//      V |   |    |   |
-// A -> B-+-> C -> D<--+
-// ^          |    |
-// +----------+----+
-//
-// This tests the following cases:
-//  - B and C are together in multiple cycles
-//  - B and D are cycles to themselves.
+//      +-+---+    +---+    +---+
+//      V |   |    V   |    V   |
+// A -> B-+-> C -> D---+--->E---+
+// ^          |`---|--------^
+// +----------+----+        F

 message A {
  optional B b = 1;
@ -23,11 +19,21 @@ message C {
  optional A a = 1;
  optional B b = 2;
  optional D d = 3;
+  optional E e = 4;
 }

 message D {
  optional A a = 1;
  optional D d = 2;
+  optional E e = 3;
+}
+
+message E {
+  optional E e = 1;
+}
+
+message F {
+  optional E e = 1;
 }

 // A proto with a bunch of simple primitives.
--- a/tests/test_cpp.cc
+++ b/tests/test_cpp.cc
@ -15,6 +15,7 @@
 #include "upb/upb.hpp"
 #include "upb/pb/decoder.hpp"
 #include "upb/pb/glue.hpp"
+#include "upb_test.h"

 static void TestSymbolTable(const char *descriptor_file) {
  upb::SymbolTable *s = upb::SymbolTable::New();
@ -23,20 +24,20 @@ static void TestSymbolTable(const char *descriptor_file) {
    std::cerr << "Couldn't load descriptor: " << status;
    exit(1);
  }
-  const upb::MessageDef *md = s->LookupMessage("A");
-  assert(md);
+  const upb::MessageDef *md = s->LookupMessage("A", &md);
+  ASSERT(md);

  s->Unref();
-  md->Unref();
+  md->Unref(&md);
 }

 static void TestByteStream() {
  upb::StringSource stringsrc;
  stringsrc.Reset("testing", 7);
  upb::ByteRegion* byteregion = stringsrc.AllBytes();
-  assert(byteregion->FetchAll() == UPB_BYTE_OK);
+  ASSERT(byteregion->FetchAll() == UPB_BYTE_OK);
  char* str = byteregion->StrDup();
-  assert(strcmp(str, "testing") == 0);
+  ASSERT(strcmp(str, "testing") == 0);
  free(str);
 }

--- a/tests/test_decoder.cc
+++ b/tests/test_decoder.cc
@ -21,6 +21,10 @@
 *   of submsg/sequences, etc.
 */

+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS  // For PRIuS, etc.
+#endif
+
 #include <inttypes.h>
 #include <stdarg.h>
 #include <stdint.h>
@ -32,95 +36,133 @@
 #include "upb/upb.h"
 #include "upb_test.h"

+// Copied from decoder.c, since this is not a public interface.
 typedef struct {
-  char *buf;
-  size_t len;
-} buffer;
+  uint8_t native_wire_type;
+  bool is_numeric;
+} upb_decoder_typeinfo;
+
+static const upb_decoder_typeinfo upb_decoder_types[] = {
+  {UPB_WIRE_TYPE_END_GROUP,   false},  // ENDGROUP
+  {UPB_WIRE_TYPE_64BIT,       true},   // DOUBLE
+  {UPB_WIRE_TYPE_32BIT,       true},   // FLOAT
+  {UPB_WIRE_TYPE_VARINT,      true},   // INT64
+  {UPB_WIRE_TYPE_VARINT,      true},   // UINT64
+  {UPB_WIRE_TYPE_VARINT,      true},   // INT32
+  {UPB_WIRE_TYPE_64BIT,       true},   // FIXED64
+  {UPB_WIRE_TYPE_32BIT,       true},   // FIXED32
+  {UPB_WIRE_TYPE_VARINT,      true},   // BOOL
+  {UPB_WIRE_TYPE_DELIMITED,   false},  // STRING
+  {UPB_WIRE_TYPE_START_GROUP, false},  // GROUP
+  {UPB_WIRE_TYPE_DELIMITED,   false},  // MESSAGE
+  {UPB_WIRE_TYPE_DELIMITED,   false},  // BYTES
+  {UPB_WIRE_TYPE_VARINT,      true},   // UINT32
+  {UPB_WIRE_TYPE_VARINT,      true},   // ENUM
+  {UPB_WIRE_TYPE_32BIT,       true},   // SFIXED32
+  {UPB_WIRE_TYPE_64BIT,       true},   // SFIXED64
+  {UPB_WIRE_TYPE_VARINT,      true},   // SINT32
+  {UPB_WIRE_TYPE_VARINT,      true},   // SINT64
+};
+
+
+class buffer {
+ public:
+  buffer(const void *data, size_t len) : len_(0) { append(data, len); }
+  explicit buffer(const char *data) : len_(0) { append(data); }
+  explicit buffer(size_t len) : len_(len) { memset(buf_, 0, len); }
+  buffer(const buffer& buf) : len_(0) { append(buf); }
+  buffer() : len_(0) {}
+
+  void append(const void *data, size_t len) {
+    ASSERT_NOCOUNT(len + len_ < sizeof(buf_));
+    memcpy(buf_ + len_, data, len);
+    len_ += len;
+    buf_[len_] = NULL;
+  }

-// Mem is initialized to NULL.
-buffer *buffer_new(size_t len) {
-  buffer *buf = malloc(sizeof(*buf));
-  buf->buf = malloc(len);
-  buf->len = len;
-  memset(buf->buf, 0, buf->len);
-  return buf;
-}
+  void append(const buffer& buf) {
+    append(buf.buf_, buf.len_);
+  }

-buffer *buffer_new2(const void *data, size_t len) {
-  buffer *buf = buffer_new(len);
-  memcpy(buf->buf, data, len);
-  return buf;
-}
+  void append(const char *str) {
+    append(str, strlen(str));
+  }

-buffer *buffer_new3(const char *data) {
-  return buffer_new2(data, strlen(data));
-}
+  void vappendf(const char *fmt, va_list args) {
+    size_t avail = sizeof(buf_) - len_;
+    size_t size = vsnprintf(buf_ + len_, avail, fmt, args);
+    ASSERT_NOCOUNT(avail > size);
+    len_ += size;
+  }

-buffer *buffer_dup(buffer *buf) { return buffer_new2(buf->buf, buf->len); }
+  void appendf(const char *fmt, ...) {
+    va_list args;
+    va_start(args, fmt);
+    vappendf(fmt, args);
+    va_end(args);
+  }

-void buffer_free(buffer *buf) {
-  free(buf->buf);
-  free(buf);
-}
+  void assign(const buffer& buf) {
+    clear();
+    append(buf);
+  }

-void buffer_appendf(buffer *buf, const char *fmt, ...) {
-  va_list args;
-  va_start(args, fmt);
-  size_t size = buf->len;
-  buf->len += upb_vrprintf(&buf->buf, &size, buf->len, fmt, args);
-  va_end(args);
-}
+  bool eql(const buffer& other) const {
+    return len_ == other.len_ && memcmp(buf_, other.buf_, len_) == 0;
+  }

-void buffer_cat(buffer *buf, buffer *buf2) {
-  size_t newlen = buf->len + buf2->len;
-  buf->buf = realloc(buf->buf, newlen);
-  memcpy(buf->buf + buf->len, buf2->buf, buf2->len);
-  buf->len = newlen;
-  buffer_free(buf2);
-}
+  void clear() { len_ = 0; }
+  size_t len() const { return len_; }
+  const char *buf() const { return buf_; }

-bool buffer_eql(buffer *buf, buffer *buf2) {
-  return buf->len == buf2->len && memcmp(buf->buf, buf2->buf, buf->len) == 0;
-}
+ private:
+  // Has to be big enough for the largest string used in the test.
+  char buf_[32768];
+  size_t len_;
+};


 /* Routines for building arbitrary protos *************************************/

-buffer *cat(buffer *arg1, ...) {
-  va_list ap;
-  buffer *arg;
-  va_start(ap, arg1);
-  while ((arg = va_arg(ap, buffer*)) != NULL) {
-    buffer_cat(arg1, arg);
-  }
-  va_end(ap);
-  return arg1;
+const buffer empty;
+
+buffer cat(const buffer& a, const buffer& b,
+           const buffer& c = empty,
+           const buffer& d = empty,
+           const buffer& e = empty) {
+  buffer ret;
+  ret.append(a);
+  ret.append(b);
+  ret.append(c);
+  ret.append(d);
+  ret.append(e);
+  return ret;
 }

-buffer *varint(uint64_t x) {
-  buffer *buf = buffer_new(UPB_PB_VARINT_MAX_LEN + 1);
-  buf->len = upb_vencode64(x, buf->buf);
-  return buf;
+buffer varint(uint64_t x) {
+  char buf[UPB_PB_VARINT_MAX_LEN];
+  size_t len = upb_vencode64(x, buf);
+  return buffer(buf, len);
 }

 // TODO: proper byte-swapping for big-endian machines.
-buffer *fixed32(void *data) { return buffer_new2(data, 4); }
-buffer *fixed64(void *data) { return buffer_new2(data, 8); }
-
-buffer *delim(buffer *buf) { return cat( varint(buf->len), buf, NULL ); }
-buffer *uint32(uint32_t u32) { return fixed32(&u32); }
-buffer *uint64(uint64_t u64) { return fixed64(&u64); }
-buffer *flt(float f) { return fixed32(&f); }
-buffer *dbl(double d) { return fixed64(&d); }
-buffer *zz32(int32_t x) { return varint(upb_zzenc_32(x)); }
-buffer *zz64(int64_t x) { return varint(upb_zzenc_64(x)); }
-
-buffer *tag(uint32_t fieldnum, char wire_type) {
+buffer fixed32(void *data) { return buffer(data, 4); }
+buffer fixed64(void *data) { return buffer(data, 8); }
+
+buffer delim(const buffer& buf) { return cat(varint(buf.len()), buf); }
+buffer uint32(uint32_t u32) { return fixed32(&u32); }
+buffer uint64(uint64_t u64) { return fixed64(&u64); }
+buffer flt(float f) { return fixed32(&f); }
+buffer dbl(double d) { return fixed64(&d); }
+buffer zz32(int32_t x) { return varint(upb_zzenc_32(x)); }
+buffer zz64(int64_t x) { return varint(upb_zzenc_64(x)); }
+
+buffer tag(uint32_t fieldnum, char wire_type) {
  return varint((fieldnum << 3) | wire_type);
 }

-buffer *submsg(uint32_t fn, buffer *buf) {
-  return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf), NULL );
+buffer submsg(uint32_t fn, const buffer& buf) {
+  return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf) );
 }


@ -128,11 +170,26 @@ buffer *submsg(uint32_t fn, buffer *buf) {

 // The handlers simply append to a string indicating what handlers were called.
 // This string is similar to protobuf text format but fields are referred to by
-// number instead of name and sequences are explicitly delimited.
+// number instead of name and sequences are explicitly delimited.  We indent
+// using the closure depth to test that the stack of closures is properly
+// handled.
+
+int closures[UPB_MAX_NESTING];
+buffer output;
+
+void indentbuf(buffer *buf, int depth) {
+  for (int i = 0; i < depth; i++)
+    buf->append("  ", 2);
+}
+
+void indent(void *depth) {
+  indentbuf(&output, *(int*)depth);
+}

 #define VALUE_HANDLER(member, fmt) \
  upb_flow_t value_ ## member(void *closure, upb_value fval, upb_value val) { \
-    buffer_appendf(closure, "%" PRIu32 ":%" fmt "; ",                         \
+    indent(closure);                                                          \
+    output.appendf("%" PRIu32 ":%" fmt "\n",                                  \
                   upb_value_getuint32(fval), upb_value_get ## member(val));  \
    return UPB_CONTINUE;                                                      \
  }
@ -145,7 +202,8 @@ VALUE_HANDLER(float, "g")
 VALUE_HANDLER(double, "g")

 upb_flow_t value_bool(void *closure, upb_value fval, upb_value val) {
-  buffer_appendf(closure, "%" PRIu32 ":%s; ",
+  indent(closure);
+  output.appendf("%" PRIu32 ":%s\n",
                 upb_value_getuint32(fval),
                 upb_value_getbool(val) ? "true" : "false");
  return UPB_CONTINUE;
@ -153,34 +211,49 @@ upb_flow_t value_bool(void *closure, upb_value fval, upb_value val) {

 upb_flow_t value_string(void *closure, upb_value fval, upb_value val) {
  // Note: won't work with strings that contain NULL.
+  indent(closure);
  char *str = upb_byteregion_strdup(upb_value_getbyteregion(val));
-  buffer_appendf(closure, "%" PRIu32 ":%s; ", upb_value_getuint32(fval), str);
+  output.appendf("%" PRIu32 ":%s\n", upb_value_getuint32(fval), str);
  free(str);
  return UPB_CONTINUE;
 }

 upb_sflow_t startsubmsg(void *closure, upb_value fval) {
-  buffer_appendf(closure, "%" PRIu32 ":{ ", upb_value_getuint32(fval));
-  return UPB_CONTINUE_WITH(closure);
+  indent(closure);
+  output.appendf("%" PRIu32 ":{\n", upb_value_getuint32(fval));
+  return UPB_CONTINUE_WITH(((int*)closure) + 1);
 }

 upb_flow_t endsubmsg(void *closure, upb_value fval) {
-  (void)fval;
-  buffer_appendf(closure, "} ");
+  indent(closure);
+  output.append("}\n");
  return UPB_CONTINUE;
 }

 upb_sflow_t startseq(void *closure, upb_value fval) {
-  buffer_appendf(closure, "%" PRIu32 ":[ ", upb_value_getuint32(fval));
-  return UPB_CONTINUE_WITH(closure);
+  indent(closure);
+  output.appendf("%" PRIu32 ":[\n", upb_value_getuint32(fval));
+  return UPB_CONTINUE_WITH(((int*)closure) + 1);
 }

 upb_flow_t endseq(void *closure, upb_value fval) {
-  (void)fval;
-  buffer_appendf(closure, "] ");
+  indent(closure);
+  output.append("]\n");
  return UPB_CONTINUE;
 }

+upb_flow_t startmsg(void *closure) {
+  indent(closure);
+  output.append("<\n");
+  return UPB_CONTINUE;
+}
+
+void endmsg(void *closure, upb_status *status) {
+  (void)status;
+  indent(closure);
+  output.append(">\n");
+}
+
 void doreg(upb_mhandlers *m, uint32_t num, upb_fieldtype_t type, bool repeated,
           upb_value_handler *handler) {
  upb_fhandlers *f = upb_mhandlers_newfhandlers(m, num, type, repeated);
@ -221,6 +294,9 @@ void reg_subm(upb_mhandlers *m, uint32_t num, upb_fieldtype_t type,
 }

 void reghandlers(upb_mhandlers *m) {
+  upb_mhandlers_setstartmsg(m, &startmsg);
+  upb_mhandlers_setendmsg(m, &endmsg);
+
  // Register handlers for each type.
  reg(m, UPB_TYPE(DOUBLE),   &value_double);
  reg(m, UPB_TYPE(FLOAT),    &value_float);
@ -267,7 +343,7 @@ size_t upb_seamsrc_avail(const upb_seamsrc *src, size_t ofs) {
 }

 upb_bytesuccess_t upb_seamsrc_fetch(void *_src, uint64_t ofs, size_t *read) {
-  upb_seamsrc *src = _src;
+  upb_seamsrc *src = (upb_seamsrc*)_src;
  assert(ofs < src->len);
  if (ofs == src->len) {
    upb_status_seteof(&src->bytesrc.status);
@ -279,7 +355,7 @@ upb_bytesuccess_t upb_seamsrc_fetch(void *_src, uint64_t ofs, size_t *read) {

 void upb_seamsrc_copy(const void *_src, uint64_t ofs,
                      size_t len, char *dst) {
-  const upb_seamsrc *src = _src;
+  const upb_seamsrc *src = (const upb_seamsrc*)_src;
  assert(ofs + len <= src->len);
  memcpy(dst, src->str + ofs, len);
 }
@ -290,7 +366,7 @@ void upb_seamsrc_discard(void *src, uint64_t ofs) {
 }

 const char *upb_seamsrc_getptr(const void *_s, uint64_t ofs, size_t *len) {
-  const upb_seamsrc *src = _s;
+  const upb_seamsrc *src = (const upb_seamsrc*)_s;
  *len = upb_seamsrc_avail(src, ofs);
  return src->str + ofs;
 }
@ -314,7 +390,7 @@ void upb_seamsrc_init(upb_seamsrc *s, const char *str, size_t len) {
 }

 void upb_seamsrc_resetseams(upb_seamsrc *s, size_t seam1, size_t seam2) {
-  ASSERT(seam1 <= seam2);
+  assert(seam1 <= seam2);
  s->seam1 = seam1;
  s->seam2 = seam2;
  s->byteregion.discard = 0;
@ -337,83 +413,68 @@ upb_byteregion *upb_seamsrc_allbytes(upb_seamsrc *s) {
 /* Running of test cases ******************************************************/

 upb_decoderplan *plan;
-
-void run_decoder(buffer *proto, buffer *expected_output) {
+#define LINE(x) x "\n"
+void run_decoder(const buffer& proto, const buffer* expected_output) {
  upb_seamsrc src;
-  upb_seamsrc_init(&src, proto->buf, proto->len);
+  upb_seamsrc_init(&src, proto.buf(), proto.len());
  upb_decoder d;
  upb_decoder_init(&d);
  upb_decoder_resetplan(&d, plan, 0);
-  for (size_t i = 0; i < proto->len; i++) {
-    for (size_t j = i; j < proto->len; j++) {
+  for (size_t i = 0; i < proto.len(); i++) {
+    for (size_t j = i; j < UPB_MIN(proto.len(), i + 5); j++) {
      upb_seamsrc_resetseams(&src, i, j);
      upb_byteregion *input = upb_seamsrc_allbytes(&src);
-      buffer *output = buffer_new(0);
-      upb_decoder_resetinput(&d, input, output);
+      output.clear();
+      upb_decoder_resetinput(&d, input, &closures[0]);
      upb_success_t success = UPB_SUSPENDED;
      while (success == UPB_SUSPENDED)
        success = upb_decoder_decode(&d);
      ASSERT(upb_ok(upb_decoder_status(&d)) == (success == UPB_OK));
      if (expected_output) {
-        ASSERT(success == UPB_OK);
+        ASSERT_STATUS(success == UPB_OK, upb_decoder_status(&d));
        // The input should be fully consumed.
        ASSERT(upb_byteregion_fetchofs(input) == upb_byteregion_endofs(input));
        ASSERT(upb_byteregion_discardofs(input) ==
               upb_byteregion_endofs(input));
-        if (!buffer_eql(output, expected_output)) {
+        if (!output.eql(*expected_output)) {
          fprintf(stderr, "Text mismatch: '%s' vs '%s'\n",
-                  output->buf, expected_output->buf);
+                  output.buf(), expected_output->buf());
        }
-        ASSERT(strcmp(output->buf, expected_output->buf) == 0);
+        ASSERT(output.eql(*expected_output));
      } else {
        ASSERT(success == UPB_ERROR);
      }
-      buffer_free(output);
    }
  }
-  upb_seamsrc_uninit(&src);
  upb_decoder_uninit(&d);
-  buffer_free(proto);
-}
-
-void assert_successful_parse_at_eof(buffer *proto, const char *expected_fmt,
-                                    va_list args) {
-  buffer *expected_text = buffer_new(0);
-  size_t size = expected_text->len;
-  expected_text->len += upb_vrprintf(&expected_text->buf, &size,
-                                     expected_text->len, expected_fmt, args);
-  run_decoder(proto, expected_text);
-  buffer_free(expected_text);
+  upb_seamsrc_uninit(&src);
 }

-void assert_does_not_parse_at_eof(buffer *proto) {
-  run_decoder(proto, NULL);
-}
+const static buffer thirty_byte_nop = buffer(cat(
+    tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(buffer(30)) ));

-void assert_successful_parse(buffer *proto, const char *expected_fmt, ...) {
-  // The JIT is only used for data >=20 bytes from end-of-buffer, so
-  // repeat once with no-op padding data at the end of buffer.
-  va_list args, args2;
+void assert_successful_parse(const buffer& proto,
+                             const char *expected_fmt, ...) {
+  buffer expected_text;
+  va_list args;
  va_start(args, expected_fmt);
-  va_copy(args2, args);
-  assert_successful_parse_at_eof(buffer_dup(proto), expected_fmt, args);
-  assert_successful_parse_at_eof(
-      cat( proto,
-           tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(buffer_new(30)),
-           NULL ),
-      expected_fmt, args2);
+  expected_text.vappendf(expected_fmt, args);
  va_end(args);
-  va_end(args2);
+  // The JIT is only used for data >=20 bytes from end-of-buffer, so
+  // repeat once with no-op padding data at the end of buffer.
+  run_decoder(proto, &expected_text);
+  run_decoder(cat( proto, thirty_byte_nop ), &expected_text);
 }

-void assert_does_not_parse(buffer *proto) {
+void assert_does_not_parse_at_eof(const buffer& proto) {
+  run_decoder(proto, NULL);
+}
+
+void assert_does_not_parse(const buffer& proto) {
  // The JIT is only used for data >=20 bytes from end-of-buffer, so
  // repeat once with no-op padding data at the end of buffer.
-  assert_does_not_parse_at_eof(buffer_dup(proto));
-  assert_does_not_parse_at_eof(
-      cat( proto,
-           tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim( buffer_new(30)),
-           NULL ));
+  assert_does_not_parse_at_eof(proto);
+  assert_does_not_parse_at_eof(cat( proto, thirty_byte_nop ));
 }


@ -421,19 +482,19 @@ void assert_does_not_parse(buffer *proto) {

 void test_premature_eof_for_type(upb_fieldtype_t type) {
  // Incomplete values for each wire type.
-  static const char *incompletes[] = {
-    "\x80",    // UPB_WIRE_TYPE_VARINT
-    "abcdefg", // UPB_WIRE_TYPE_64BIT
-    "\x80",    // UPB_WIRE_TYPE_DELIMITED (partial length)
-    NULL,      // UPB_WIRE_TYPE_START_GROUP (no value required)
-    NULL,      // UPB_WIRE_TYPE_END_GROUP (no value required)
-    "abc"      // UPB_WIRE_TYPE_32BIT
+  static const buffer incompletes[6] = {
+    buffer("\x80"),     // UPB_WIRE_TYPE_VARINT
+    buffer("abcdefg"),  // UPB_WIRE_TYPE_64BIT
+    buffer("\x80"),     // UPB_WIRE_TYPE_DELIMITED (partial length)
+    buffer(),           // UPB_WIRE_TYPE_START_GROUP (no value required)
+    buffer(),           // UPB_WIRE_TYPE_END_GROUP (no value required)
+    buffer("abc")       // UPB_WIRE_TYPE_32BIT
  };

  uint32_t fieldnum = type;
  uint32_t rep_fieldnum = rep_fn(type);
-  int wire_type = upb_types[type].native_wire_type;
-  const char *incomplete = incompletes[wire_type];
+  int wire_type = upb_decoder_types[type].native_wire_type;
+  const buffer& incomplete = incompletes[wire_type];

  // EOF before a known non-repeated value.
  assert_does_not_parse_at_eof(tag(fieldnum, wire_type));
@ -446,108 +507,128 @@ void test_premature_eof_for_type(upb_fieldtype_t type) {

  // EOF inside a known non-repeated value.
  assert_does_not_parse_at_eof(
-      cat( tag(fieldnum, wire_type), buffer_new3(incomplete), NULL ));
+      cat( tag(fieldnum, wire_type), incomplete ));

  // EOF inside a known repeated value.
  assert_does_not_parse_at_eof(
-      cat( tag(rep_fieldnum, wire_type), buffer_new3(incomplete), NULL ));
+      cat( tag(rep_fieldnum, wire_type), incomplete ));

  // EOF inside an unknown value.
  assert_does_not_parse_at_eof(
-      cat( tag(UNKNOWN_FIELD, wire_type), buffer_new3(incomplete), NULL ));
+      cat( tag(UNKNOWN_FIELD, wire_type), incomplete ));

  if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
    // EOF in the middle of delimited data for known non-repeated value.
    assert_does_not_parse_at_eof(
-        cat( tag(fieldnum, wire_type), varint(1), NULL ));
+        cat( tag(fieldnum, wire_type), varint(1) ));

    // EOF in the middle of delimited data for known repeated value.
    assert_does_not_parse_at_eof(
-        cat( tag(rep_fieldnum, wire_type), varint(1), NULL ));
+        cat( tag(rep_fieldnum, wire_type), varint(1) ));

    // EOF in the middle of delimited data for unknown value.
    assert_does_not_parse_at_eof(
-        cat( tag(UNKNOWN_FIELD, wire_type), varint(1), NULL ));
+        cat( tag(UNKNOWN_FIELD, wire_type), varint(1) ));

    if (type == UPB_TYPE(MESSAGE)) {
      // Submessage ends in the middle of a value.
-      buffer *incomplete_submsg =
+      buffer incomplete_submsg =
          cat ( tag(UPB_TYPE(INT32), UPB_WIRE_TYPE_VARINT),
-                buffer_new3(incompletes[UPB_WIRE_TYPE_VARINT]), NULL );
+                incompletes[UPB_WIRE_TYPE_VARINT] );
      assert_does_not_parse(
          cat( tag(fieldnum, UPB_WIRE_TYPE_DELIMITED),
-               varint(incomplete_submsg->len),
-               incomplete_submsg, NULL ));
+               varint(incomplete_submsg.len()),
+               incomplete_submsg ));
    }
  } else {
    // Packed region ends in the middle of a value.
    assert_does_not_parse(
        cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
-             varint(strlen(incomplete)),
-             buffer_new3(incomplete), NULL ));
+             varint(incomplete.len()),
+             incomplete ));

    // EOF in the middle of packed region.
    assert_does_not_parse_at_eof(
-        cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), varint(1), NULL ));
+        cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), varint(1) ));
  }
 }

 // "33" and "66" are just two random values that all numeric types can
 // represent.
 void test_valid_data_for_type(upb_fieldtype_t type,
-                              buffer *enc33, buffer *enc66) {
+                              const buffer& enc33, const buffer& enc66) {
  uint32_t fieldnum = type;
  uint32_t rep_fieldnum = rep_fn(type);
-  int wire_type = upb_types[type].native_wire_type;
+  int wire_type = upb_decoder_types[type].native_wire_type;

  // Non-repeated
  assert_successful_parse(
-      cat( tag(fieldnum, wire_type), buffer_dup(enc33),
-           tag(fieldnum, wire_type), buffer_dup(enc66), NULL ),
-      "%u:33; %u:66; ", fieldnum, fieldnum);
+      cat( tag(fieldnum, wire_type), enc33,
+           tag(fieldnum, wire_type), enc66 ),
+      LINE("<")
+      LINE("%u:33")
+      LINE("%u:66")
+      LINE(">"), fieldnum, fieldnum);

  // Non-packed repeated.
  assert_successful_parse(
-      cat( tag(rep_fieldnum, wire_type), buffer_dup(enc33),
-           tag(rep_fieldnum, wire_type), buffer_dup(enc66), NULL ),
-      "%u:[ %u:33; %u:66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
+      cat( tag(rep_fieldnum, wire_type), enc33,
+           tag(rep_fieldnum, wire_type), enc66 ),
+      LINE("<")
+      LINE("%u:[")
+      LINE("  %u:33")
+      LINE("  %u:66")
+      LINE("]")
+      LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);

  // Packed repeated.
  assert_successful_parse(
      cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
-           delim(cat( buffer_dup(enc33), buffer_dup(enc66), NULL )), NULL ),
-      "%u:[ %u:33; %u:66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
-
-  buffer_free(enc33);
-  buffer_free(enc66);
+           delim(cat( enc33, enc66 )) ),
+      LINE("<")
+      LINE("%u:[")
+      LINE("  %u:33")
+      LINE("  %u:66")
+      LINE("]")
+      LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);
 }

 void test_valid_data_for_signed_type(upb_fieldtype_t type,
-                                     buffer *enc33, buffer *enc66) {
+                                     const buffer& enc33, const buffer& enc66) {
  uint32_t fieldnum = type;
  uint32_t rep_fieldnum = rep_fn(type);
-  int wire_type = upb_types[type].native_wire_type;
+  int wire_type = upb_decoder_types[type].native_wire_type;

  // Non-repeated
  assert_successful_parse(
-      cat( tag(fieldnum, wire_type), buffer_dup(enc33),
-           tag(fieldnum, wire_type), buffer_dup(enc66), NULL ),
-      "%u:33; %u:-66; ", fieldnum, fieldnum);
+      cat( tag(fieldnum, wire_type), enc33,
+           tag(fieldnum, wire_type), enc66 ),
+      LINE("<")
+      LINE("%u:33")
+      LINE("%u:-66")
+      LINE(">"), fieldnum, fieldnum);

  // Non-packed repeated.
  assert_successful_parse(
-      cat( tag(rep_fieldnum, wire_type), buffer_dup(enc33),
-           tag(rep_fieldnum, wire_type), buffer_dup(enc66), NULL ),
-      "%u:[ %u:33; %u:-66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
+      cat( tag(rep_fieldnum, wire_type), enc33,
+           tag(rep_fieldnum, wire_type), enc66 ),
+      LINE("<")
+      LINE("%u:[")
+      LINE("  %u:33")
+      LINE("  %u:-66")
+      LINE("]")
+      LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);

  // Packed repeated.
  assert_successful_parse(
      cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
-           delim(cat( buffer_dup(enc33), buffer_dup(enc66), NULL )), NULL ),
-      "%u:[ %u:33; %u:-66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
-
-  buffer_free(enc33);
-  buffer_free(enc66);
+           delim(cat( enc33, enc66 )) ),
+      LINE("<")
+      LINE("%u:[")
+      LINE("  %u:33")
+      LINE("  %u:-66")
+      LINE("]")
+      LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);
 }

 // Test that invalid protobufs are properly detected (without crashing) and
@ -571,7 +652,7 @@ void test_invalid() {
  test_premature_eof_for_type(UPB_TYPE(SINT64));

  // EOF inside a tag's varint.
-  assert_does_not_parse_at_eof( buffer_new3("\x80") );
+  assert_does_not_parse_at_eof( buffer("\x80") );

  // EOF inside a known group.
  assert_does_not_parse_at_eof( tag(4, UPB_WIRE_TYPE_START_GROUP) );
@ -584,33 +665,19 @@ void test_invalid() {

  // Field number is 0.
  assert_does_not_parse(
-      cat( tag(0, UPB_WIRE_TYPE_DELIMITED), varint(0), NULL ));
+      cat( tag(0, UPB_WIRE_TYPE_DELIMITED), varint(0) ));

  // Field number is too large.
  assert_does_not_parse(
      cat( tag(UPB_MAX_FIELDNUMBER + 1, UPB_WIRE_TYPE_DELIMITED),
-           varint(0), NULL ));
+           varint(0) ));

  // Test exceeding the resource limit of stack depth.
-  buffer *buf = buffer_new3("");
+  buffer buf;
  for (int i = 0; i < UPB_MAX_NESTING; i++) {
-    buf = submsg(UPB_TYPE(MESSAGE), buf);
+    buf.assign(submsg(UPB_TYPE(MESSAGE), buf));
  }
  assert_does_not_parse(buf);
-
-  // Staying within the stack limit should work properly.
-  buf = buffer_new3("");
-  buffer *textbuf = buffer_new3("");
-  int total = UPB_MAX_NESTING - 1;
-  for (int i = 0; i < total; i++) {
-    buf = submsg(UPB_TYPE(MESSAGE), buf);
-    buffer_appendf(textbuf, "%u:{ ", UPB_TYPE(MESSAGE));
-  }
-  for (int i = 0; i < total; i++) {
-    buffer_appendf(textbuf, "} ");
-  }
-  assert_successful_parse(buf, "%s", textbuf->buf);
-  buffer_free(textbuf);
 }

 void test_valid() {
@ -629,16 +696,80 @@ void test_valid() {
  test_valid_data_for_type(UPB_TYPE(FIXED64), uint64(33), uint64(66));
  test_valid_data_for_type(UPB_TYPE(FIXED32), uint32(33), uint32(66));

+  // Test implicit startseq/endseq.
+  uint32_t repfl_fn = rep_fn(UPB_TYPE(FLOAT));
+  uint32_t repdb_fn = rep_fn(UPB_TYPE(DOUBLE));
+  assert_successful_parse(
+      cat( tag(repfl_fn, UPB_WIRE_TYPE_32BIT), flt(33),
+           tag(repdb_fn, UPB_WIRE_TYPE_64BIT), dbl(66) ),
+      LINE("<")
+      LINE("%u:[")
+      LINE("  %u:33")
+      LINE("]")
+      LINE("%u:[")
+      LINE("  %u:66")
+      LINE("]")
+      LINE(">"), repfl_fn, repfl_fn, repdb_fn, repdb_fn);
+
  // Submessage tests.
  uint32_t msg_fn = UPB_TYPE(MESSAGE);
  assert_successful_parse(
-      submsg(msg_fn, submsg(msg_fn, submsg(msg_fn, buffer_new3("")))),
-      "%u:{ %u:{ %u:{ } } } ", msg_fn, msg_fn, msg_fn);
+      submsg(msg_fn, submsg(msg_fn, submsg(msg_fn, buffer()))),
+      LINE("<")
+      LINE("%u:{")
+      LINE("  <")
+      LINE("  %u:{")
+      LINE("    <")
+      LINE("    %u:{")
+      LINE("      <")
+      LINE("      >")
+      LINE("    }")
+      LINE("    >")
+      LINE("  }")
+      LINE("  >")
+      LINE("}")
+      LINE(">"), msg_fn, msg_fn, msg_fn);

  uint32_t repm_fn = rep_fn(UPB_TYPE(MESSAGE));
  assert_successful_parse(
-      submsg(repm_fn, submsg(repm_fn, buffer_new3(""))),
-      "%u:[ %u:{ %u:[ %u:{ } ] } ] ", repm_fn, repm_fn, repm_fn, repm_fn);
+      submsg(repm_fn, submsg(repm_fn, buffer())),
+      LINE("<")
+      LINE("%u:[")
+      LINE("  %u:{")
+      LINE("    <")
+      LINE("    %u:[")
+      LINE("      %u:{")
+      LINE("        <")
+      LINE("        >")
+      LINE("      }")
+      LINE("    ]")
+      LINE("    >")
+      LINE("  }")
+      LINE("]")
+      LINE(">"), repm_fn, repm_fn, repm_fn, repm_fn);
+
+  // Staying within the stack limit should work properly.
+  buffer buf;
+  buffer textbuf;
+  int total = UPB_MAX_NESTING - 1;
+  for (int i = 0; i < total; i++) {
+    buf.assign(submsg(UPB_TYPE(MESSAGE), buf));
+    indentbuf(&textbuf, i);
+    textbuf.append("<\n");
+    indentbuf(&textbuf, i);
+    textbuf.appendf("%u:{\n", UPB_TYPE(MESSAGE));
+  }
+  indentbuf(&textbuf, total);
+  textbuf.append("<\n");
+  indentbuf(&textbuf, total);
+  textbuf.append(">\n");
+  for (int i = 0; i < total; i++) {
+    indentbuf(&textbuf, total - i - 1);
+    textbuf.append("}\n");
+    indentbuf(&textbuf, total - i - 1);
+    textbuf.append(">\n");
+  }
+  assert_successful_parse(buf, "%s", textbuf.buf());
 }

 void run_tests() {
@ -647,10 +778,17 @@ void run_tests() {
 }

 int main() {
+  for (int i = 0; i < UPB_MAX_NESTING; i++) {
+    closures[i] = i;
+  }
  // Construct decoder plan.
  upb_handlers *h = upb_handlers_new();
  reghandlers(upb_handlers_newmhandlers(h));

+  // Create an empty handlers to make sure that the decoder can handle empty
+  // messages.
+  upb_handlers_newmhandlers(h);
+
  // Test without JIT.
  plan = upb_decoderplan_new(h, false);
  run_tests();
@ -658,6 +796,11 @@ int main() {

  // Test JIT.
  plan = upb_decoderplan_new(h, true);
+#ifdef UPB_USE_JIT_X64
+  ASSERT(upb_decoderplan_hasjitcode(plan));
+#else
+  ASSERT(!upb_decoderplan_hasjitcode(plan));
+#endif
  run_tests();
  upb_decoderplan_unref(plan);

--- a/tests/test_def.c
+++ b/tests/test_def.c
@ -1,19 +1,174 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc.  See LICENSE for details.
+ *
+ * Test of defs and symtab.  There should be far more tests of edge conditions
+ * (like attempts to link defs that don't have required properties set).
+ */

-#undef NDEBUG  /* ensure tests always assert. */
 #include "upb/def.h"
+#include "upb/pb/glue.h"
+#include "upb_test.h"
 #include <stdlib.h>
+#include <string.h>

-int main() {
-  upb_symtab *s = upb_symtab_new();
+const char *descriptor_file;

-  // Will be empty atm since we haven't added anything to the symtab.
+static void test_empty_symtab() {
+  upb_symtab *s = upb_symtab_new();
  int count;
-  const upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY);
-  for (int i = 0; i < count; i++) {
-    upb_def_unref(defs[i]);
-  }
+  const upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY, NULL);
+  ASSERT(count == 0);
  free(defs);
+  upb_symtab_unref(s);
+}

+static upb_symtab *load_test_proto() {
+  upb_symtab *s = upb_symtab_new();
+  ASSERT(s);
+  upb_status status = UPB_STATUS_INIT;
+  if (!upb_load_descriptor_file_into_symtab(s, descriptor_file, &status)) {
+    fprintf(stderr, "Error loading descriptor file: %s\n",
+            upb_status_getstr(&status));
+    exit(1);
+  }
+  upb_status_uninit(&status);
+  return s;
+}
+
+static void test_cycles() {
+  upb_symtab *s = load_test_proto();
+
+  // Test cycle detection by making a cyclic def's main refcount go to zero
+  // and then be incremented to one again.
+  const upb_def *def = upb_symtab_lookup(s, "A", &def);
+  ASSERT(def);
+  ASSERT(upb_def_isfinalized(def));
  upb_symtab_unref(s);
+
+  // Message A has only one subfield: "optional B b = 1".
+  const upb_msgdef *m = upb_downcast_msgdef_const(def);
+  upb_fielddef *f = upb_msgdef_itof(m, 1);
+  ASSERT(f);
+  ASSERT(upb_hassubdef(f));
+  const upb_def *def2 = upb_fielddef_subdef(f);
+  ASSERT(upb_downcast_msgdef_const(def2));
+  ASSERT(strcmp(upb_def_fullname(def2), "B") == 0);
+
+  upb_def_ref(def2, &def2);
+  upb_def_unref(def, &def);
+  upb_def_unref(def2, &def2);
+}
+
+static void test_fielddef_unref() {
+  upb_symtab *s = load_test_proto();
+  const upb_msgdef *md = upb_symtab_lookupmsg(s, "A", &md);
+  upb_fielddef *f = upb_msgdef_itof(md, 1);
+  upb_fielddef_ref(f, &f);
+
+  // Unref symtab and msgdef; now fielddef is the only thing keeping the msgdef
+  // alive.
+  upb_symtab_unref(s);
+  upb_msgdef_unref(md, &md);
+  // Check that md is still alive.
+  ASSERT(strcmp(upb_def_fullname(UPB_UPCAST(md)), "A") == 0);
+
+  // Check that unref of fielddef frees the whole remaining graph.
+  upb_fielddef_unref(f, &f);
+}
+
+static void test_fielddef_accessors() {
+  upb_fielddef *f1 = upb_fielddef_new(&f1);
+  upb_fielddef *f2 = upb_fielddef_new(&f2);
+
+  ASSERT(upb_fielddef_ismutable(f1));
+  upb_fielddef_setname(f1, "f1");
+  upb_fielddef_setnumber(f1, 1937);
+  upb_fielddef_settype(f1, UPB_TYPE(FIXED64));
+  upb_fielddef_setlabel(f1, UPB_LABEL(REPEATED));
+  ASSERT(upb_fielddef_number(f1) == 1937);
+
+  ASSERT(upb_fielddef_ismutable(f2));
+  upb_fielddef_setname(f2, "f2");
+  upb_fielddef_setnumber(f2, 1572);
+  upb_fielddef_settype(f2, UPB_TYPE(BYTES));
+  upb_fielddef_setlabel(f2, UPB_LABEL(REPEATED));
+  ASSERT(upb_fielddef_number(f2) == 1572);
+
+  upb_fielddef_unref(f1, &f1);
+  upb_fielddef_unref(f2, &f2);
+}
+
+static upb_fielddef *newfield(
+    const char *name, int32_t num, uint8_t type, uint8_t label,
+    const char *type_name, void *owner) {
+  upb_fielddef *f = upb_fielddef_new(owner);
+  upb_fielddef_setname(f, name);
+  upb_fielddef_setnumber(f, num);
+  upb_fielddef_settype(f, type);
+  upb_fielddef_setlabel(f, label);
+  upb_fielddef_setsubtypename(f, type_name);
+  return f;
+}
+
+static upb_msgdef *upb_msgdef_newnamed(const char *name, void *owner) {
+  upb_msgdef *m = upb_msgdef_new(owner);
+  upb_def_setfullname(UPB_UPCAST(m), name);
+  return m;
+}
+
+INLINE upb_enumdef *upb_enumdef_newnamed(const char *name, void *owner) {
+  upb_enumdef *e = upb_enumdef_new(owner);
+  upb_def_setfullname(UPB_UPCAST(e), name);
+  return e;
+}
+
+void test_replacement() {
+  upb_symtab *s = upb_symtab_new();
+
+  upb_msgdef *m = upb_msgdef_newnamed("MyMessage", &s);
+  upb_msgdef_addfield(m, newfield(
+      "field1", 1, UPB_TYPE(ENUM), UPB_LABEL(OPTIONAL), ".MyEnum", &s), &s);
+  upb_msgdef *m2 = upb_msgdef_newnamed("MyMessage2", &s);
+  upb_enumdef *e = upb_enumdef_newnamed("MyEnum", &s);
+
+  upb_def *newdefs[] = {UPB_UPCAST(m), UPB_UPCAST(m2), UPB_UPCAST(e)};
+  upb_status status = UPB_STATUS_INIT;
+  ASSERT_STATUS(upb_symtab_add(s, newdefs, 3, &s, &status), &status);
+
+  // Try adding a new definition of MyEnum, MyMessage should get replaced with
+  // a new version.
+  upb_enumdef *e2 = upb_enumdef_new(&s);
+  upb_def_setfullname(UPB_UPCAST(e2), "MyEnum");
+  upb_def *newdefs2[] = {UPB_UPCAST(e2)};
+  ASSERT_STATUS(upb_symtab_add(s, newdefs2, 1, &s, &status), &status);
+
+  const upb_msgdef *m3 = upb_symtab_lookupmsg(s, "MyMessage", &m3);
+  ASSERT(m3);
+  // Must be different because it points to MyEnum which was replaced.
+  ASSERT(m3 != m);
+  upb_msgdef_unref(m3, &m3);
+
+  m3 = upb_symtab_lookupmsg(s, "MyMessage2", &m3);
+  // Should be the same because it was not replaced, nor were any defs that
+  // are reachable from it.
+  ASSERT(m3 == m2);
+  upb_msgdef_unref(m3, &m3);
+
+  upb_symtab_unref(s);
+}
+
+int main(int argc, char *argv[]) {
+  if (argc < 2) {
+    fprintf(stderr, "Usage: test_def <test.proto.pb>\n");
+    return 1;
+  }
+  descriptor_file = argv[1];
+  test_empty_symtab();
+  test_cycles();
+  test_fielddef_accessors();
+  test_fielddef_unref();
+  test_replacement();
  return 0;
 }
--- a/tests/test_table.cc
+++ b/tests/test_table.cc
@ -1,8 +1,11 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2012 Google Inc.  See LICENSE for details.
+ *
+ * Tests for upb_table.
+ */

-#undef NDEBUG  /* ensure tests always assert. */
-#include "upb/table.h"
-#include "test_util.h"
-#include <assert.h>
 #include <string.h>
 #include <sys/resource.h>
 #include <ext/hash_map>
@ -11,55 +14,45 @@
 #include <set>
 #include <string>
 #include <vector>
+#include "tests/test_util.h"
+#include "tests/upb_test.h"
+#include "upb/table.h"

 bool benchmark = false;
 #define CPU_TIME_PER_TEST 0.5

 using std::vector;

-typedef struct {
-  uint32_t value;  /* key*2 */
-} inttable_entry;
-
-typedef struct {
-  int32_t value;  /* ASCII Value of first letter */
-} strtable_entry;
-
-double get_usertime()
-{
+double get_usertime() {
  struct rusage usage;
  getrusage(RUSAGE_SELF, &usage);
  return usage.ru_utime.tv_sec + (usage.ru_utime.tv_usec/1000000.0);
 }

 /* num_entries must be a power of 2. */
-void test_strtable(const vector<std::string>& keys, uint32_t num_to_insert)
-{
+void test_strtable(const vector<std::string>& keys, uint32_t num_to_insert) {
  /* Initialize structures. */
  upb_strtable table;
  std::map<std::string, int32_t> m;
-  upb_strtable_init(&table, 0, sizeof(strtable_entry));
+  upb_strtable_init(&table);
  std::set<std::string> all;
  for(size_t i = 0; i < num_to_insert; i++) {
    const std::string& key = keys[i];
    all.insert(key);
-    strtable_entry e;
-    e.value = key[0];
-    upb_strtable_insert(&table, key.c_str(), &e);
+    upb_strtable_insert(&table, key.c_str(), upb_value_int32(key[0]));
    m[key] = key[0];
  }

  /* Test correctness. */
  for(uint32_t i = 0; i < keys.size(); i++) {
    const std::string& key = keys[i];
-    strtable_entry *e =
-        (strtable_entry*)upb_strtable_lookup(&table, key.c_str());
+    const upb_value *v = upb_strtable_lookup(&table, key.c_str());
    if(m.find(key) != m.end()) { /* Assume map implementation is correct. */
-      assert(e);
-      assert(e->value == key[0]);
-      assert(m[key] == key[0]);
+      ASSERT(v);
+      ASSERT(upb_value_getint32(*v) == key[0]);
+      ASSERT(m[key] == key[0]);
    } else {
-      assert(e == NULL);
+      ASSERT(v == NULL);
    }
  }

@ -69,66 +62,83 @@ void test_strtable(const vector<std::string>& keys, uint32_t num_to_insert)
    const char *key = upb_strtable_iter_key(&iter);
    std::string tmp(key, strlen(key));
    std::set<std::string>::iterator i = all.find(tmp);
-    assert(i != all.end());
+    ASSERT(i != all.end());
    all.erase(i);
  }
-  assert(all.empty());
+  ASSERT(all.empty());

-  upb_strtable_free(&table);
+  upb_strtable_uninit(&table);
 }

 /* num_entries must be a power of 2. */
-void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc)
-{
+void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) {
  /* Initialize structures. */
  upb_inttable table;
  uint32_t largest_key = 0;
  std::map<uint32_t, uint32_t> m;
  __gnu_cxx::hash_map<uint32_t, uint32_t> hm;
-  upb_inttable_init(&table, num_entries, sizeof(inttable_entry));
+  upb_inttable_init(&table);
  for(size_t i = 0; i < num_entries; i++) {
    int32_t key = keys[i];
    largest_key = UPB_MAX((int32_t)largest_key, key);
-    inttable_entry e;
-    e.value = (key*2) << 1;
-    upb_inttable_insert(&table, key, &e);
+    upb_inttable_insert(&table, key, upb_value_uint32(key * 2));
    m[key] = key*2;
    hm[key] = key*2;
  }

  /* Test correctness. */
  for(uint32_t i = 0; i <= largest_key; i++) {
-    inttable_entry *e = (inttable_entry*)upb_inttable_lookup(
-        &table, i);
+    const upb_value *v = upb_inttable_lookup(&table, i);
+    if(m.find(i) != m.end()) { /* Assume map implementation is correct. */
+      ASSERT(v);
+      ASSERT(upb_value_getuint32(*v) == i*2);
+      ASSERT(m[i] == i*2);
+      ASSERT(hm[i] == i*2);
+    } else {
+      ASSERT(v == NULL);
+    }
+  }
+
+  for(uint16_t i = 0; i < num_entries; i += 2) {
+    upb_value val;
+    bool ret = upb_inttable_remove(&table, keys[i], &val);
+    ASSERT(ret == (m.erase(keys[i]) == 1));
+    if (ret) ASSERT(upb_value_getuint32(val) == keys[i] * 2);
+    hm.erase(keys[i]);
+    m.erase(keys[i]);
+  }
+
+  ASSERT(upb_inttable_count(&table) == hm.size());
+
+  /* Test correctness. */
+  for(uint32_t i = 0; i <= largest_key; i++) {
+    const upb_value *v = upb_inttable_lookup(&table, i);
    if(m.find(i) != m.end()) { /* Assume map implementation is correct. */
-      assert(e);
-      //printf("addr: %p, expected: %d, actual: %d\n", e, i*2, e->value);
-      assert(((e->value) >> 1) == i*2);
-      assert(m[i] == i*2);
-      assert(hm[i] == i*2);
+      ASSERT(v);
+      ASSERT(upb_value_getuint32(*v) == i*2);
+      ASSERT(m[i] == i*2);
+      ASSERT(hm[i] == i*2);
    } else {
-      assert(e == NULL);
+      ASSERT(v == NULL);
    }
  }

  // Compact and test correctness again.
  upb_inttable_compact(&table);
  for(uint32_t i = 0; i <= largest_key; i++) {
-    inttable_entry *e = (inttable_entry*)upb_inttable_lookup(
-        &table, i);
+    const upb_value *v = upb_inttable_lookup(&table, i);
    if(m.find(i) != m.end()) { /* Assume map implementation is correct. */
-      assert(e);
-      //printf("addr: %p, expected: %d, actual: %d\n", e, i*2, e->value);
-      assert(((e->value) >> 1) == i*2);
-      assert(m[i] == i*2);
-      assert(hm[i] == i*2);
+      ASSERT(v);
+      ASSERT(upb_value_getuint32(*v) == i*2);
+      ASSERT(m[i] == i*2);
+      ASSERT(hm[i] == i*2);
    } else {
-      assert(e == NULL);
+      ASSERT(v == NULL);
    }
  }

  if(!benchmark) {
-    upb_inttable_free(&table);
+    upb_inttable_uninit(&table);
    return;
  }

@ -141,7 +151,7 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc)
  }
  for(uint16_t i = num_entries - 1; i >= 1; i--) {
    uint16_t rand_i = (random() / (double)RAND_MAX) * i;
-    assert(rand_i <= i);
+    ASSERT(rand_i <= i);
    uint16_t tmp = rand_order[rand_i];
    rand_order[rand_i] = rand_order[i];
    rand_order[i] = tmp;
@ -162,8 +172,8 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc)
  for(i = 0; true; i++) {
    MAYBE_BREAK;
    int32_t key = keys[i & mask];
-    inttable_entry *e = (inttable_entry*)upb_inttable_lookup(&table, key);
-    x += (uintptr_t)e;
+    const upb_value *v = upb_inttable_lookup32(&table, key);
+    x += (uintptr_t)v;
  }
  double total = get_usertime() - before;
  printf("%s/s\n", eng(i/total, 3, false));
@ -174,8 +184,8 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc)
  for(i = 0; true; i++) {
    MAYBE_BREAK;
    int32_t key = keys[rand_order[i & mask]];
-    inttable_entry *e = (inttable_entry*)upb_inttable_lookup(&table, key);
-    x += (uintptr_t)e;
+    const upb_value *v = upb_inttable_lookup32(&table, key);
+    x += (uintptr_t)v;
  }
  total = get_usertime() - before;
  printf("%s/s\n", eng(i/total, 3, false));
@ -223,20 +233,18 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc)
  }
  total = get_usertime() - before;
  printf("%s/s\n\n", eng(i/total, 3, false));
-  upb_inttable_free(&table);
+  upb_inttable_uninit(&table);
  delete rand_order;
 }

-int32_t *get_contiguous_keys(int32_t num)
-{
+int32_t *get_contiguous_keys(int32_t num) {
  int32_t *buf = new int32_t[num];
  for(int32_t i = 0; i < num; i++)
-    buf[i] = i+1;
+    buf[i] = i;
  return buf;
 }

-int main(int argc, char *argv[])
-{
+int main(int argc, char *argv[]) {
  for (int i = 1; i < argc; i++) {
    if (strcmp(argv[i], "--benchmark") == 0) benchmark = true;
  }
--- a/tests/test_vs_proto2.cc
+++ b/tests/test_vs_proto2.cc
@ -1,7 +1,7 @@
 /*
 * upb - a minimalist implementation of protocol buffers.
 *
- * Copyright (c) 2011 Google Inc.  See LICENSE for details.
+ * Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
 *
 * A test that verifies that our results are identical to proto2 for a
 * given proto type and input protobuf.
@ -9,230 +9,87 @@

 #define __STDC_LIMIT_MACROS  // So we get UINT32_MAX
 #include <assert.h>
+#include <google/protobuf/descriptor.h>
+#include <google/protobuf/wire_format_lite.h>
 #include <inttypes.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <google/protobuf/descriptor.h>
-#include <google/protobuf/wire_format_lite.h>
 #include "benchmarks/google_messages.pb.h"
-#include "upb/def.h"
-#include "upb/msg.h"
+#include "upb/def.hpp"
+#include "upb/handlers.hpp"
+#include "upb/msg.hpp"
+#include "upb/pb/decoder.hpp"
 #include "upb/pb/glue.h"
 #include "upb/pb/varint.h"
+#include "upb/proto2_bridge.hpp"
 #include "upb_test.h"

-size_t string_size;
-
-void compare(const google::protobuf::Message& proto2_msg,
-             void *upb_msg, const upb_msgdef *upb_md);
-
-void compare_arrays(const google::protobuf::Reflection *r,
-                    const google::protobuf::Message& proto2_msg,
-                    const google::protobuf::FieldDescriptor *proto2_f,
-                    void *upb_msg, upb_fielddef *upb_f)
-{
-  ASSERT(upb_msg_has(upb_msg, upb_f));
-  ASSERT(upb_isseq(upb_f));
-  const void *arr = upb_value_getptr(upb_msg_getseq(upb_msg, upb_f));
-  const void *iter = upb_seq_begin(arr, upb_f);
-  for(int i = 0;
-      i < r->FieldSize(proto2_msg, proto2_f);
-      i++, iter = upb_seq_next(arr, iter, upb_f)) {
-    ASSERT(!upb_seq_done(iter));
-    upb_value v = upb_seq_get(iter, upb_f);
-    switch(upb_f->type) {
-      default:
-        ASSERT(false);
-      case UPB_TYPE(DOUBLE):
-        ASSERT(r->GetRepeatedDouble(proto2_msg, proto2_f, i) == upb_value_getdouble(v));
-        break;
-      case UPB_TYPE(FLOAT):
-        ASSERT(r->GetRepeatedFloat(proto2_msg, proto2_f, i) == upb_value_getfloat(v));
-        break;
-      case UPB_TYPE(INT64):
-      case UPB_TYPE(SINT64):
-      case UPB_TYPE(SFIXED64):
-        ASSERT(r->GetRepeatedInt64(proto2_msg, proto2_f, i) == upb_value_getint64(v));
-        break;
-      case UPB_TYPE(UINT64):
-      case UPB_TYPE(FIXED64):
-        ASSERT(r->GetRepeatedUInt64(proto2_msg, proto2_f, i) == upb_value_getuint64(v));
-        break;
-      case UPB_TYPE(SFIXED32):
-      case UPB_TYPE(SINT32):
-      case UPB_TYPE(INT32):
-      case UPB_TYPE(ENUM):
-        ASSERT(r->GetRepeatedInt32(proto2_msg, proto2_f, i) == upb_value_getint32(v));
-        break;
-      case UPB_TYPE(FIXED32):
-      case UPB_TYPE(UINT32):
-        ASSERT(r->GetRepeatedUInt32(proto2_msg, proto2_f, i) == upb_value_getuint32(v));
-        break;
-      case UPB_TYPE(BOOL):
-        ASSERT(r->GetRepeatedBool(proto2_msg, proto2_f, i) == upb_value_getbool(v));
-        break;
-      case UPB_TYPE(STRING):
-      case UPB_TYPE(BYTES): {
-        std::string str = r->GetRepeatedString(proto2_msg, proto2_f, i);
-        upb_stdarray *upbstr = (upb_stdarray*)upb_value_getptr(v);
-        std::string str2(upbstr->ptr, upbstr->len);
-        string_size += upbstr->len;
-        ASSERT(str == str2);
-        break;
-      }
-      case UPB_TYPE(GROUP):
-      case UPB_TYPE(MESSAGE):
-        ASSERT(upb_dyncast_msgdef(upb_f->def) != NULL);
-        compare(r->GetRepeatedMessage(proto2_msg, proto2_f, i),
-                upb_value_getptr(v), upb_downcast_msgdef(upb_f->def));
-    }
-  }
-  ASSERT(upb_seq_done(iter));
-}
-
-void compare_values(const google::protobuf::Reflection *r,
-                    const google::protobuf::Message& proto2_msg,
-                    const google::protobuf::FieldDescriptor *proto2_f,
-                    void *upb_msg, upb_fielddef *upb_f)
-{
-  upb_value v = upb_msg_get(upb_msg, upb_f);
-  switch(upb_f->type) {
-    default:
-      ASSERT(false);
-    case UPB_TYPE(DOUBLE):
-      ASSERT(r->GetDouble(proto2_msg, proto2_f) == upb_value_getdouble(v));
-      break;
-    case UPB_TYPE(FLOAT):
-      ASSERT(r->GetFloat(proto2_msg, proto2_f) == upb_value_getfloat(v));
-      break;
-    case UPB_TYPE(INT64):
-    case UPB_TYPE(SINT64):
-    case UPB_TYPE(SFIXED64):
-      ASSERT(r->GetInt64(proto2_msg, proto2_f) == upb_value_getint64(v));
-      break;
-    case UPB_TYPE(UINT64):
-    case UPB_TYPE(FIXED64):
-      ASSERT(r->GetUInt64(proto2_msg, proto2_f) == upb_value_getuint64(v));
-      break;
-    case UPB_TYPE(SFIXED32):
-    case UPB_TYPE(SINT32):
-    case UPB_TYPE(INT32):
-    case UPB_TYPE(ENUM):
-      ASSERT(r->GetInt32(proto2_msg, proto2_f) == upb_value_getint32(v));
-      break;
-    case UPB_TYPE(FIXED32):
-    case UPB_TYPE(UINT32):
-      ASSERT(r->GetUInt32(proto2_msg, proto2_f) == upb_value_getuint32(v));
-      break;
-    case UPB_TYPE(BOOL):
-      ASSERT(r->GetBool(proto2_msg, proto2_f) == upb_value_getbool(v));
-      break;
-    case UPB_TYPE(STRING):
-    case UPB_TYPE(BYTES): {
-      std::string str = r->GetString(proto2_msg, proto2_f);
-      upb_stdarray *upbstr = (upb_stdarray*)upb_value_getptr(v);
-      std::string str2(upbstr->ptr, upbstr->len);
-      string_size += upbstr->len;
-      ASSERT(str == str2);
-      break;
-    }
-    case UPB_TYPE(GROUP):
-    case UPB_TYPE(MESSAGE):
-      // XXX: getstr
-      compare(r->GetMessage(proto2_msg, proto2_f),
-              upb_value_getptr(v), upb_downcast_msgdef(upb_f->def));
-  }
-}
-
-void compare(const google::protobuf::Message& proto2_msg,
-             void *upb_msg, const upb_msgdef *upb_md)
-{
-  const google::protobuf::Reflection *r = proto2_msg.GetReflection();
-  const google::protobuf::Descriptor *d = proto2_msg.GetDescriptor();
-
-  ASSERT(d->field_count() == upb_msgdef_numfields(upb_md));
-  upb_msg_iter i;
-  for(i = upb_msg_begin(upb_md); !upb_msg_done(i); i = upb_msg_next(upb_md, i)) {
-    upb_fielddef *upb_f = upb_msg_iter_field(i);
+void compare_metadata(const google::protobuf::Descriptor* d,
+                      const upb::MessageDef *upb_md) {
+  ASSERT(d->field_count() == upb_md->field_count());
+  for (upb::MessageDef::ConstIterator i(upb_md); !i.Done(); i.Next()) {
+    const upb::FieldDef* upb_f = i.field();
    const google::protobuf::FieldDescriptor *proto2_f =
-        d->FindFieldByNumber(upb_f->number);
-    // Make sure the definitions are equal.
+        d->FindFieldByNumber(upb_f->number());
    ASSERT(upb_f);
    ASSERT(proto2_f);
-    ASSERT(upb_f->number == proto2_f->number());
-    ASSERT(std::string(upb_f->name) == proto2_f->name());
-    ASSERT(upb_f->type == proto2_f->type());
-    ASSERT(upb_isseq(upb_f) == proto2_f->is_repeated());
-
-    if(!upb_msg_has(upb_msg, upb_f)) {
-      if(upb_isseq(upb_f))
-        ASSERT(r->FieldSize(proto2_msg, proto2_f) == 0);
-      else
-        ASSERT(r->HasField(proto2_msg, proto2_f) == false);
-    } else {
-      if(upb_isseq(upb_f)) {
-        compare_arrays(r, proto2_msg, proto2_f, upb_msg, upb_f);
-      } else {
-        ASSERT(r->HasField(proto2_msg, proto2_f) == true);
-        compare_values(r, proto2_msg, proto2_f, upb_msg, upb_f);
-      }
-    }
+    ASSERT(upb_f->number() == proto2_f->number());
+    ASSERT(std::string(upb_f->name()) == proto2_f->name());
+    ASSERT(upb_f->type() == static_cast<upb::FieldType>(proto2_f->type()));
+    ASSERT(upb_f->IsSequence() == proto2_f->is_repeated());
  }
 }

-void parse_and_compare(MESSAGE_CIDENT *proto2_msg,
-                       void *upb_msg, const upb_msgdef *upb_md,
-                       const char *str, size_t len, bool allow_jit)
-{
+void parse_and_compare(MESSAGE_CIDENT *msg1, MESSAGE_CIDENT *msg2,
+                       const upb::MessageDef *upb_md,
+                       const char *str, size_t len, bool allow_jit) {
  // Parse to both proto2 and upb.
-  ASSERT(proto2_msg->ParseFromArray(str, len));
-  upb_status status = UPB_STATUS_INIT;
-  upb_msg_clear(upb_msg, upb_md);
-  upb_strtomsg(str, len, upb_msg, upb_md, allow_jit, &status);
-  if (!upb_ok(&status)) {
-    fprintf(stderr, "Error parsing protobuf: %s", upb_status_getstr(&status));
-    exit(1);
-  }
-  string_size = 0;
-  compare(*proto2_msg, upb_msg, upb_md);
-  printf("Total size: %zd, string size: %zd (%0.2f%%)\n", len,
-         string_size, (double)string_size / len * 100);
-  upb_status_uninit(&status);
+  ASSERT(msg1->ParseFromArray(str, len));
+
+  upb::Handlers* handlers = upb::Handlers::New();
+  upb::RegisterWriteHandlers(handlers, upb_md);
+  upb::DecoderPlan* plan = upb::DecoderPlan::New(handlers, allow_jit);
+  upb::StringSource src(str, len);
+  upb::Decoder decoder;
+  decoder.ResetPlan(plan, 0);
+  decoder.ResetInput(src.AllBytes(), msg2);
+  msg2->Clear();
+  ASSERT(decoder.Decode() == UPB_OK);
+  plan->Unref();
+  handlers->Unref();
+
+  // Would like to just compare the message objects themselves,  but
+  // unfortunately MessageDifferencer is not part of the open-source release of
+  // proto2, so we compare their serialized strings, which we expect will be
+  // equivalent.
+  std::string str1;
+  std::string str2;
+  msg1->SerializeToString(&str1);
+  msg2->SerializeToString(&str2);
+  ASSERT(str1 == str2);
+  ASSERT(std::string(str, len) == str2);
 }

-int main(int argc, char *argv[])
-{
-  if (argc < 3) {
-    fprintf(stderr, "Usage: test_vs_proto2 <descriptor file> <message file>\n");
-    return 1;
+void test_zig_zag() {
+  for (uint64_t num = 5; num * 1.5 > num; num *= 1.5) {
+    ASSERT(upb_zzenc_64(num) ==
+           google::protobuf::internal::WireFormatLite::ZigZagEncode64(num));
+    if (num < UINT32_MAX) {
+      ASSERT(upb_zzenc_32(num) ==
+             google::protobuf::internal::WireFormatLite::ZigZagEncode32(num));
+    }
  }
-  const char *descriptor_file = argv[1];
-  const char *message_file = argv[2];

-  // Initialize upb state, parse descriptor.
-  upb_status status = UPB_STATUS_INIT;
-  upb_symtab *symtab = upb_symtab_new();
-  size_t fds_len;
-  const char *fds = upb_readfile(descriptor_file, &fds_len);
-  if(fds == NULL) {
-    fprintf(stderr, "Couldn't read %s.\n", descriptor_file);
-    return 1;
-  }
-  upb_load_descriptor_into_symtab(symtab, fds, fds_len, &status);
-  if(!upb_ok(&status)) {
-    fprintf(stderr, "Error importing %s: %s", descriptor_file,
-            upb_status_getstr(&status));
-    return 1;
-  }
-  free((void*)fds);
+}

-  const upb_def *def = upb_symtab_lookup(symtab, MESSAGE_NAME);
-  const upb_msgdef *msgdef;
-  if(!def || !(msgdef = upb_dyncast_msgdef_const(def))) {
-    fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
+int main(int argc, char *argv[])
+{
+  if (argc < 2) {
+    fprintf(stderr, "Usage: test_vs_proto2 <message file>\n");
    return 1;
  }
+  const char *message_file = argv[1];

  // Read the message data itself.
  size_t len;
@ -242,32 +99,25 @@ int main(int argc, char *argv[])
    return 1;
  }

+  MESSAGE_CIDENT msg1;
+  MESSAGE_CIDENT msg2;
+
+  const upb::MessageDef* m = upb::proto2_bridge::NewFinalMessageDef(msg1, &m);
+
+  compare_metadata(msg1.GetDescriptor(), m);
+
  // Run twice to test proper object reuse.
-  MESSAGE_CIDENT proto2_msg;
-  void *upb_msg = upb_stdmsg_new(msgdef);
-  parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, true);
-  parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, false);
-  parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, true);
-  parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, false);
+  parse_and_compare(&msg1, &msg2, m, str, len, true);
+  parse_and_compare(&msg1, &msg2, m, str, len, false);
+  parse_and_compare(&msg1, &msg2, m, str, len, true);
+  parse_and_compare(&msg1, &msg2, m, str, len, false);
  printf("All tests passed, %d assertions.\n", num_assertions);

-  upb_stdmsg_free(upb_msg, msgdef);
-  upb_def_unref(UPB_UPCAST(msgdef));
+  m->Unref(&m);
  free((void*)str);
-  upb_symtab_unref(symtab);
-  upb_status_uninit(&status);

-  // Test Zig-Zag encoding/decoding.
-  for (uint64_t num = 5; num * 1.5 > num; num *= 1.5) {
-    ASSERT(upb_zzenc_64(num) ==
-           google::protobuf::internal::WireFormatLite::ZigZagEncode64(num));
-    if (num < UINT32_MAX) {
-      ASSERT(upb_zzenc_32(num) ==
-             google::protobuf::internal::WireFormatLite::ZigZagEncode32(num));
-    }
-  }
+  test_zig_zag();

  google::protobuf::ShutdownProtobufLibrary();
-
  return 0;
 }
--- a/tests/tests.c
+++ b/tests/tests.c
@ -1,121 +0,0 @@
-
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include "upb/def.h"
-#include "upb/handlers.h"
-#include "upb/pb/decoder.h"
-#include "upb/pb/glue.h"
-#include "upb_test.h"
-
-const char *descriptor_file;
-
-static upb_symtab *load_test_proto() {
-  upb_symtab *s = upb_symtab_new();
-  ASSERT(s);
-  upb_status status = UPB_STATUS_INIT;
-  if (!upb_load_descriptor_file_into_symtab(s, descriptor_file, &status)) {
-    fprintf(stderr, "Error loading descriptor file: %s\n",
-            upb_status_getstr(&status));
-    exit(1);
-  }
-  upb_status_uninit(&status);
-  return s;
-}
-
-static upb_flow_t upb_test_onvalue(void *c, upb_value fval, upb_value val) {
-  (void)c;
-  (void)fval;
-  (void)val;
-  return UPB_CONTINUE;
-}
-
-static void test_upb_jit() {
-  upb_symtab *s = load_test_proto();
-  const upb_def *def = upb_symtab_lookup(s, "SimplePrimitives");
-  ASSERT(def);
-
-  upb_handlers *h = upb_handlers_new();
-  upb_handlerset hset = {NULL, NULL, &upb_test_onvalue, NULL, NULL, NULL, NULL};
-  upb_handlers_reghandlerset(h, upb_downcast_msgdef_const(def), &hset);
-  upb_decoderplan *p = upb_decoderplan_new(h, true);
-#ifdef UPB_USE_JIT_X64
-  ASSERT(upb_decoderplan_hasjitcode(p));
-#else
-  ASSERT(!upb_decoderplan_hasjitcode(p));
-#endif
-  upb_decoderplan_unref(p);
-  upb_symtab_unref(s);
-  upb_def_unref(def);
-  upb_handlers_unref(h);
-}
-
-static void test_upb_symtab() {
-  upb_symtab *s = load_test_proto();
-
-  // Test cycle detection by making a cyclic def's main refcount go to zero
-  // and then be incremented to one again.
-  const upb_def *def = upb_symtab_lookup(s, "A");
-  ASSERT(def);
-  upb_symtab_unref(s);
-  const upb_msgdef *m = upb_downcast_msgdef_const(def);
-  upb_msg_iter i = upb_msg_begin(m);
-  ASSERT(!upb_msg_done(i));
-  upb_fielddef *f = upb_msg_iter_field(i);
-  ASSERT(upb_hassubdef(f));
-  upb_def *def2 = f->def;
-
-  i = upb_msg_next(m, i);
-  ASSERT(upb_msg_done(i));  // "A" should only have one field.
-
-  ASSERT(upb_downcast_msgdef(def2));
-  upb_def_ref(def2);
-  upb_def_unref(def);
-  upb_def_unref(def2);
-}
-
-static void test_upb_two_fielddefs() {
-  upb_fielddef *f1 = upb_fielddef_new();
-  upb_fielddef *f2 = upb_fielddef_new();
-
-  ASSERT(upb_fielddef_ismutable(f1));
-  upb_fielddef_setname(f1, "");
-  upb_fielddef_setnumber(f1, 1937);
-  upb_fielddef_settype(f1, UPB_TYPE(FIXED64));
-  upb_fielddef_setlabel(f1, UPB_LABEL(REPEATED));
-  upb_fielddef_settypename(f1, "");
-  ASSERT(upb_fielddef_number(f1) == 1937);
-
-  ASSERT(upb_fielddef_ismutable(f2));
-  upb_fielddef_setname(f2, "");
-  upb_fielddef_setnumber(f2, 1572);
-  upb_fielddef_settype(f2, UPB_TYPE(BYTES));
-  upb_fielddef_setlabel(f2, UPB_LABEL(REPEATED));
-  upb_fielddef_settypename(f2, "");
-  ASSERT(upb_fielddef_number(f2) == 1572);
-
-  upb_fielddef_unref(f1);
-  upb_fielddef_unref(f2);
-}
-
-int main(int argc, char *argv[])
-{
-  if (argc < 2) {
-    fprintf(stderr, "Usage: test_cpp <descriptor file>\n");
-    return 1;
-  }
-  descriptor_file = argv[1];
-#define TEST(func) do { \
-  int assertions_before = num_assertions; \
-  printf("Running " #func "..."); fflush(stdout); \
-  func(); \
-  printf("ok (%d assertions).\n", num_assertions - assertions_before); \
-  } while (0)
-
-  TEST(test_upb_symtab);
-  TEST(test_upb_jit);
-  TEST(test_upb_two_fielddefs);
-  printf("All tests passed (%d assertions).\n", num_assertions);
-  return 0;
-}
--- a/tests/upb_test.h
+++ b/tests/upb_test.h
@ -7,6 +7,7 @@
 #ifndef UPB_TEST_H_
 #define UPB_TEST_H_

+#include <stdio.h>
 #include <stdlib.h>

 #ifdef __cplusplus
@ -18,9 +19,28 @@ int num_assertions = 0;
  ++num_assertions; \
  if (!(expr)) { \
    fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
+    fprintf(stderr, "expr: %s\n", #expr); \
    abort(); \
  } \
-} while(0)
+} while (0)
+
+#define ASSERT_NOCOUNT(expr) do { \
+  if (!(expr)) { \
+    fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
+    fprintf(stderr, "expr: %s\n", #expr); \
+    abort(); \
+  } \
+} while (0)
+
+#define ASSERT_STATUS(expr, status) do { \
+  ++num_assertions; \
+  if (!(expr)) { \
+    fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
+    fprintf(stderr, "expr: %s\n", #expr); \
+    fprintf(stderr, "failed status: %s\n", upb_status_getstr(status)); \
+    abort(); \
+  } \
+} while (0)

 #ifdef __cplusplus
 }  /* extern "C" */
--- a/tools/upbc.c
+++ b/tools/upbc.c
@ -55,7 +55,7 @@ static void write_const_h(const upb_def *defs[], int num_entries,
  for(int i = 0; i < num_entries; i++) {  /* Foreach enum */
    if(defs[i]->type != UPB_DEF_ENUM) continue;
    const upb_enumdef *enumdef = upb_downcast_enumdef_const(defs[i]);
-    char *enum_name = strdup(upb_def_fqname(UPB_UPCAST(enumdef)));
+    char *enum_name = strdup(upb_def_fullname(UPB_UPCAST(enumdef)));
    char *enum_val_prefix = strdup(enum_name);
    to_cident(enum_name);
    to_preproc(enum_val_prefix);
@ -63,11 +63,12 @@ static void write_const_h(const upb_def *defs[], int num_entries,
    fprintf(stream, "typedef enum %s {\n", enum_name);
    bool first = true;
    /* Foreach enum value. */
-    for (upb_enum_iter iter = upb_enum_begin(enumdef);
-         !upb_enum_done(iter);
-         iter = upb_enum_next(enumdef, iter)) {
-      char *value_name = strdup(upb_enum_iter_name(iter));
-      uint32_t value = upb_enum_iter_number(iter);
+    upb_enum_iter iter;
+    for (upb_enum_begin(&iter, enumdef);
+         !upb_enum_done(&iter);
+         upb_enum_next(&iter)) {
+      char *value_name = strdup(upb_enum_iter_name(&iter));
+      uint32_t value = upb_enum_iter_number(&iter);
      to_preproc(value_name);
      /* "  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13," */
      if (!first) fputs(",\n", stream);
@ -85,20 +86,20 @@ static void write_const_h(const upb_def *defs[], int num_entries,
  for(int i = 0; i < num_entries; i++) {  /* Foreach enum */
    const upb_msgdef *m = upb_dyncast_msgdef_const(defs[i]);
    if(!m) continue;
-    char *msg_name = strdup(upb_def_fqname(UPB_UPCAST(m)));
+    char *msg_name = strdup(upb_def_fullname(UPB_UPCAST(m)));
    char *msg_val_prefix = strdup(msg_name);
    to_preproc(msg_val_prefix);
    upb_msg_iter i;
-    for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
-      upb_fielddef *f = upb_msg_iter_field(i);
-      char *preproc_field_name = strdup(f->name);
+    for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+      upb_fielddef *f = upb_msg_iter_field(&i);
+      char *preproc_field_name = strdup(upb_fielddef_name(f));
      to_preproc(preproc_field_name);
      fprintf(stream, "#define %s_%s__FIELDNUM %d\n",
              msg_val_prefix, preproc_field_name, upb_fielddef_number(f));
      fprintf(stream, "#define %s_%s__FIELDNAME \"%s\"\n",
-              msg_val_prefix, preproc_field_name, f->name);
+              msg_val_prefix, preproc_field_name, upb_fielddef_name(f));
      fprintf(stream, "#define %s_%s__FIELDTYPE %d\n\n",
-              msg_val_prefix, preproc_field_name, f->type);
+              msg_val_prefix, preproc_field_name, upb_fielddef_type(f));
      free(preproc_field_name);
    }
    free(msg_val_prefix);
@ -123,13 +124,13 @@ const char usage[] =
  "                     of using the input file as a basename.\n"
 ;

-void usage_err(char *err) {
+void usage_err(const char *err) {
  fprintf(stderr, "upbc: %s\n\n", err);
  fputs(usage, stderr);
  exit(1);
 }

-void error(char *err, ...) {
+void error(const char *err, ...) {
  va_list args;
  va_start(args, err);
  fprintf(stderr, "upbc: ");
@ -175,8 +176,8 @@ int main(int argc, char *argv[]) {
  upb_status_uninit(&status);

  /* Emit output files. */
-  const int maxsize = 256;
-  char h_const_filename[maxsize];
+  char h_const_filename[256];
+  const int maxsize = sizeof(h_const_filename);
  if(snprintf(h_const_filename, maxsize, "%s_const.h", outfile_base) >= maxsize)
    error("File base too long.\n");

@ -184,9 +185,9 @@ int main(int argc, char *argv[]) {
  if(!h_const_file) error("Failed to open _const.h output file\n");

  int symcount;
-  const upb_def **defs = upb_symtab_getdefs(s, &symcount, UPB_DEF_ANY);
+  const upb_def **defs = upb_symtab_getdefs(s, &symcount, UPB_DEF_ANY, &defs);
  write_const_h(defs, symcount, h_const_filename, h_const_file);
-  for (int i = 0; i < symcount; i++) upb_def_unref(defs[i]);
+  for (int i = 0; i < symcount; i++) upb_def_unref(defs[i], &defs);
  free(defs);
  free(descriptor);
  upb_symtab_unref(s);
--- a/upb/atomic.h
+++ b/upb/atomic.h
@ -1,181 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Google Inc.  See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- *
- * Only a very small part of upb is thread-safe.  Notably, individual
- * messages, arrays, and strings are *not* thread safe for mutating.
- * However, we do make message *metadata* such as upb_msgdef and
- * upb_symtab thread-safe, and their ownership is tracked via atomic
- * refcounting.  This header implements the small number of atomic
- * primitives required to support this.  The primitives we implement
- * are:
- *
- * - a reader/writer lock (wrappers around platform-provided mutexes).
- * - an atomic refcount.
- *
- * TODO: This needs some revisiting/refinement, see:
- *       http://code.google.com/p/upb/issues/detail?id=8
- */
-
-#ifndef UPB_ATOMIC_H_
-#define UPB_ATOMIC_H_
-
-#include <stdbool.h>
-#include <assert.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* inline if possible, emit standalone code if required. */
-#ifndef INLINE
-#define INLINE static inline
-#endif
-
-// Until this stuff is actually working, make thread-unsafe the default.
-#define UPB_THREAD_UNSAFE
-
-#ifdef UPB_THREAD_UNSAFE
-
-/* Non-thread-safe implementations. ******************************************/
-
-typedef struct {
-  int v;
-} upb_atomic_t;
-
-#define UPB_ATOMIC_INIT(x) {x}
-
-INLINE void upb_atomic_init(upb_atomic_t *a, int val) { a->v = val; }
-INLINE bool upb_atomic_ref(upb_atomic_t *a) { return a->v++ == 0; }
-INLINE bool upb_atomic_unref(upb_atomic_t *a) { assert(a->v > 0); return --a->v == 0; }
-INLINE int upb_atomic_read(upb_atomic_t *a) { return a->v; }
-INLINE bool upb_atomic_add(upb_atomic_t *a, int val) {
-  a->v += val;
-  return a->v == 0;
-}
-
-#endif
-
-/* Atomic refcount ************************************************************/
-
-#ifdef UPB_THREAD_UNSAFE
-
-/* Already defined above. */
-
-#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4
-
-/* GCC includes atomic primitives. */
-
-typedef struct {
-  volatile int v;
-} upb_atomic_t;
-
-INLINE void upb_atomic_init(upb_atomic_t *a, int val) {
-  a->v = val;
-  __sync_synchronize();   /* Ensure the initialized value is visible. */
-}
-
-INLINE bool upb_atomic_ref(upb_atomic_t *a) {
-  return __sync_fetch_and_add(&a->v, 1) == 0;
-}
-
-INLINE bool upb_atomic_add(upb_atomic_t *a, int n) {
-  return __sync_add_and_fetch(&a->v, n) == 0;
-}
-
-INLINE bool upb_atomic_unref(upb_atomic_t *a) {
-  return __sync_sub_and_fetch(&a->v, 1) == 0;
-}
-
-INLINE bool upb_atomic_read(upb_atomic_t *a) {
-  return __sync_fetch_and_add(&a->v, 0);
-}
-
-#elif defined(WIN32)
-
-/* Windows defines atomic increment/decrement. */
-#include <Windows.h>
-
-typedef struct {
-  volatile LONG val;
-} upb_atomic_t;
-
-INLINE void upb_atomic_init(upb_atomic_t *a, int val) {
-  InterlockedExchange(&a->val, val);
-}
-
-INLINE bool upb_atomic_ref(upb_atomic_t *a) {
-  return InterlockedIncrement(&a->val) == 1;
-}
-
-INLINE bool upb_atomic_unref(upb_atomic_t *a) {
-  return InterlockedDecrement(&a->val) == 0;
-}
-
-#else
-#error Atomic primitives not defined for your platform/CPU.  \
-       Implement them or compile with UPB_THREAD_UNSAFE.
-#endif
-
-INLINE bool upb_atomic_only(upb_atomic_t *a) {
-  return upb_atomic_read(a) == 1;
-}
-
-/* Reader/Writer lock. ********************************************************/
-
-#ifdef UPB_THREAD_UNSAFE
-
-typedef struct {
-} upb_rwlock_t;
-
-INLINE void upb_rwlock_init(const upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_destroy(const upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_rdlock(const upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_wrlock(const upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_unlock(const upb_rwlock_t *l) { (void)l; }
-
-#elif defined(UPB_USE_PTHREADS)
-
-#include <pthread.h>
-
-typedef struct {
-  pthread_rwlock_t lock;
-} upb_rwlock_t;
-
-INLINE void upb_rwlock_init(const upb_rwlock_t *l) {
-  /* TODO: check return value. */
-  pthread_rwlock_init(&l->lock, NULL);
-}
-
-INLINE void upb_rwlock_destroy(const upb_rwlock_t *l) {
-  /* TODO: check return value. */
-  pthread_rwlock_destroy(&l->lock);
-}
-
-INLINE void upb_rwlock_rdlock(const upb_rwlock_t *l) {
-  /* TODO: check return value. */
-  pthread_rwlock_rdlock(&l->lock);
-}
-
-INLINE void upb_rwlock_wrlock(const upb_rwlock_t *l) {
-  /* TODO: check return value. */
-  pthread_rwlock_wrlock(&l->lock);
-}
-
-INLINE void upb_rwlock_unlock(const upb_rwlock_t *l) {
-  /* TODO: check return value. */
-  pthread_rwlock_unlock(&l->lock);
-}
-
-#else
-#error Reader/writer lock is not defined for your platform/CPU.  \
-       Implement it or compile with UPB_THREAD_UNSAFE.
-#endif
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif  /* UPB_ATOMIC_H_ */
--- a/upb/bytestream.c
+++ b/upb/bytestream.c
@ -32,8 +32,6 @@ upb_byteregion *upb_byteregion_newl(const void *str, size_t len) {
  memcpy(ptr, str, len);
  ptr[len] = '\0';
  upb_stringsrc_reset(src, ptr, len);
-  upb_byteregion_fetch(upb_stringsrc_allbytes(src));
-  assert(len == upb_byteregion_available(upb_stringsrc_allbytes(src), 0));
  return upb_stringsrc_allbytes(src);
 }

@ -93,10 +91,10 @@ static upb_stdio_buf *upb_stdio_findbuf(const upb_stdio *s, uint64_t ofs) {

 static upb_stdio_buf *upb_stdio_rotatebufs(upb_stdio *s) {
  upb_stdio_buf **reuse = NULL;  // XXX
-  uint32_t num_reused = 0, num_inuse = 0;
+  int num_reused = 0, num_inuse = 0;

  // Could sweep only a subset of bufs if this was a hotspot.
-  for (uint32_t i = 0; i < s->nbuf; i++) {
+  for (int i = 0; i < s->nbuf; i++) {
    upb_stdio_buf *buf = s->bufs[i];
    if (buf->refcount > 0) {
      s->bufs[num_inuse++] = buf;
@ -243,10 +241,9 @@ upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; }

 upb_bytesuccess_t upb_stringsrc_fetch(void *_src, uint64_t ofs, size_t *read) {
  upb_stringsrc *src = _src;
-  assert(ofs <= src->len);
+  assert(ofs < src->len);
  if (ofs == src->len) {
    upb_status_seteof(&src->bytesrc.status);
-    *read = 0;
    return UPB_BYTE_EOF;
  }
  *read = src->len - ofs;
--- a/upb/bytestream.h
+++ b/upb/bytestream.h
@ -372,8 +372,7 @@ INLINE int upb_bytesink_putc(upb_bytesink *sink, char ch) {
 }

 INLINE int upb_bytesink_putrepeated(upb_bytesink *sink, char ch, int len) {
-  int i;
-  for (i = 0; i < len; i++)
+  for (int i = 0; i < len; i++)
    if (upb_bytesink_write(sink, &ch, 1) < 0)
      return -1;
  return len;
@ -436,7 +435,8 @@ typedef struct {
  FILE *file;
  bool should_close;
  upb_stdio_buf **bufs;
-  uint32_t nbuf, szbuf;
+  int nbuf;
+  uint32_t szbuf;
 } upb_stdio;

 void upb_stdio_init(upb_stdio *stdio);
--- a/upb/def.c
+++ b/upb/def.c
--- a/upb/def.h
+++ b/upb/def.h
@ -1,17 +1,17 @@
 /*
 * upb - a minimalist implementation of protocol buffers.
 *
- * Copyright (c) 2009-2011 Google Inc.  See LICENSE for details.
+ * Copyright (c) 2009-2012 Google Inc.  See LICENSE for details.
 * Author: Josh Haberman <jhaberman@gmail.com>
 *
- * Provides a mechanism for creating and linking proto definitions.
- * These form the protobuf schema, and are used extensively throughout upb:
+ * Defs are upb's internal representation of the constructs that can appear
+ * in a .proto file:
+ *
 * - upb_msgdef: describes a "message" construct.
 * - upb_fielddef: describes a message field.
 * - upb_enumdef: describes an enum.
 * (TODO: definitions of services).
 *
- *
 * Defs go through two distinct phases of life:
 *
 * 1. MUTABLE: when first created, the properties of the def can be set freely
@ -20,16 +20,15 @@
 *    not be used for any purpose except to set its properties (it can't be
 *    used to parse anything, create any messages in memory, etc).
 *
- * 2. FINALIZED: after being added to a symtab (which links the defs together)
- *    the defs become finalized (thread-safe and immutable).  Programs may only
- *    access defs through a CONST POINTER during this stage -- upb_symtab will
- *    help you out with this requirement by only vending const pointers, but
- *    you need to make sure not to use any non-const pointers you still have
- *    sitting around.  In practice this means that you may not call any setters
- *    on the defs (or functions that themselves call the setters).  If you want
- *    to modify an existing immutable def, copy it with upb_*_dup(), modify the
- *    copy, and add the modified def to the symtab (replacing the existing
- *    def).
+ * 2. FINALIZED: the upb_def_finalize() operation finalizes a set of defs,
+ *    which makes them thread-safe and immutable.  Finalized defs may only be
+ *    accessed through a CONST POINTER.  If you want to modify an existing
+ *    immutable def, copy it with upb_*_dup() and modify and finalize the copy.
+ *
+ * The refcounting of defs works properly no matter what state the def is in.
+ * Once the def is finalized it is guaranteed that any def reachable from a
+ * live def is also live (so a ref on the base of a message tree keeps the
+ * whole tree alive).
 *
 * You can test for which stage of life a def is in by calling
 * upb_def_ismutable().  This is particularly useful for dynamic language
@ -46,181 +45,306 @@
 #ifndef UPB_DEF_H_
 #define UPB_DEF_H_

-#include "upb/atomic.h"
+#include "upb/refcount.h"
 #include "upb/table.h"

 #ifdef __cplusplus
 extern "C" {
 #endif

-struct _upb_symtab;
-typedef struct _upb_symtab upb_symtab;
+/* upb_def: base class for defs  **********************************************/

 // All the different kind of defs we support.  These correspond 1:1 with
 // declarations in a .proto file.
 typedef enum {
-  UPB_DEF_MSG = 1,
+  UPB_DEF_MSG,
+  UPB_DEF_FIELD,
  UPB_DEF_ENUM,
  UPB_DEF_SERVICE,          // Not yet implemented.

  UPB_DEF_ANY = -1,         // Wildcard for upb_symtab_get*()
-  UPB_DEF_UNRESOLVED = 99,  // Internal-only.
 } upb_deftype_t;

-
-/* upb_def: base class for defs  **********************************************/
-
-typedef struct {
-  char *fqname;     // Fully qualified.
-  upb_symtab *symtab;     // Def is mutable iff symtab == NULL.
-  upb_atomic_t refcount;  // Owns a ref on symtab iff (symtab && refcount > 0).
+typedef struct _upb_def {
+  upb_refcount refcount;
+  char *fullname;
  upb_deftype_t type;
+  bool is_finalized;
 } upb_def;

+#define UPB_UPCAST(ptr) (&(ptr)->base)
+
 // Call to ref/unref a def.  Can be used at any time, but is not thread-safe
-// until the def is in a symtab.  While a def is in a symtab, everything
-// reachable from that def (the symtab and all defs in the symtab) are
-// guaranteed to be alive.
-void upb_def_ref(const upb_def *def);
-void upb_def_unref(const upb_def *def);
-upb_def *upb_def_dup(const upb_def *def);
-
-// A def is mutable until it has been added to a symtab.
+// until the def is finalized.  While a def is finalized, everything reachable
+// from that def is guaranteed to be alive.
+void upb_def_ref(const upb_def *def, void *owner);
+void upb_def_unref(const upb_def *def, void *owner);
+void upb_def_donateref(const upb_def *def, void *from, void *to);
+upb_def *upb_def_dup(const upb_def *def, void *owner);
+
+// A def is mutable until it has been finalized.
 bool upb_def_ismutable(const upb_def *def);
-INLINE const char *upb_def_fqname(const upb_def *def) { return def->fqname; }
-bool upb_def_setfqname(upb_def *def, const char *fqname);  // Only if mutable.
+bool upb_def_isfinalized(const upb_def *def);

-#define UPB_UPCAST(ptr) (&(ptr)->base)
+// "fullname" is the def's fully-qualified name (eg. foo.bar.Message).
+INLINE const char *upb_def_fullname(const upb_def *d) { return d->fullname; }
+
+// The def must be mutable.  Caller retains ownership of fullname.  Defs are
+// not required to have a name; if a def has no name when it is finalized, it
+// will remain an anonymous def.
+bool upb_def_setfullname(upb_def *def, const char *fullname);
+
+// Finalizes the given defs; this validates all constraints and marks the defs
+// as finalized (read-only).  This will also cause fielddefs to take refs on
+// their subdefs so that any reachable def will be kept alive (but this is
+// done in a way that correctly handles circular references).
+//
+// On success, a new list is returned containing the finalized defs and
+// ownership of the "defs" list passes to the function.  On failure NULL is
+// returned and the caller retains ownership of "defs."
+//
+// Symbolic references to sub-types or enum defaults must have already been
+// resolved.  "defs" must contain the transitive closure of any mutable defs
+// reachable from the any def in the list.  In other words, there may not be a
+// mutable def which is reachable from one of "defs" that does not appear
+// elsewhere in "defs."  "defs" may not contain fielddefs, but any fielddefs
+// reachable from the given msgdefs will be finalized.
+//
+// n is currently limited to 64k defs, if more are required break them into
+// batches of 64k (or we could raise this limit, at the cost of a bigger
+// upb_def structure or complexity in upb_finalize()).
+bool upb_finalize(upb_def *const*defs, int n, upb_status *status);


 /* upb_fielddef ***************************************************************/

-// A upb_fielddef describes a single field in a message.  It isn't a full def
-// in the sense that it derives from upb_def.  It cannot stand on its own; it
-// must be part of a upb_msgdef.  It is also reference-counted.
+// We choose these to match descriptor.proto.  Clients may use UPB_TYPE() and
+// UPB_LABEL() instead of referencing these directly.
+typedef enum {
+  UPB_TYPE_NONE     = -1,  // Internal-only, may be removed.
+  UPB_TYPE_ENDGROUP = 0,   // Internal-only, may be removed.
+  UPB_TYPE_DOUBLE   = 1,
+  UPB_TYPE_FLOAT    = 2,
+  UPB_TYPE_INT64    = 3,
+  UPB_TYPE_UINT64   = 4,
+  UPB_TYPE_INT32    = 5,
+  UPB_TYPE_FIXED64  = 6,
+  UPB_TYPE_FIXED32  = 7,
+  UPB_TYPE_BOOL     = 8,
+  UPB_TYPE_STRING   = 9,
+  UPB_TYPE_GROUP    = 10,
+  UPB_TYPE_MESSAGE  = 11,
+  UPB_TYPE_BYTES    = 12,
+  UPB_TYPE_UINT32   = 13,
+  UPB_TYPE_ENUM     = 14,
+  UPB_TYPE_SFIXED32 = 15,
+  UPB_TYPE_SFIXED64 = 16,
+  UPB_TYPE_SINT32   = 17,
+  UPB_TYPE_SINT64   = 18,
+} upb_fieldtype_t;
+
+#define UPB_NUM_TYPES 19
+
+typedef enum {
+  UPB_LABEL_OPTIONAL = 1,
+  UPB_LABEL_REQUIRED = 2,
+  UPB_LABEL_REPEATED = 3,
+} upb_label_t;
+
+// These macros are provided for legacy reasons.
+#define UPB_TYPE(type) UPB_TYPE_ ## type
+#define UPB_LABEL(type) UPB_LABEL_ ## type
+
+// Info for a given field type.
+typedef struct {
+  uint8_t align;
+  uint8_t size;
+  uint8_t inmemory_type;    // For example, INT32, SINT32, and SFIXED32 -> INT32
+} upb_typeinfo;
+
+extern const upb_typeinfo upb_types[UPB_NUM_TYPES];
+
+// A upb_fielddef describes a single field in a message.  It is most often
+// found as a part of a upb_msgdef, but can also stand alone to represent
+// an extension.
 typedef struct _upb_fielddef {
+  upb_def base;
  struct _upb_msgdef *msgdef;
-  upb_def *def;  // if upb_hasdef(f)
-  upb_atomic_t refcount;
-  bool finalized;
-
-  // The following fields may be modified until the def is finalized.
-  uint8_t type;          // Use UPB_TYPE() constants.
-  uint8_t label;         // Use UPB_LABEL() constants.
+  union {
+    char *name;    // If subdef_is_symbolic.
+    upb_def *def;  // If !subdef_is_symbolic.
+  } sub;  // The msgdef or enumdef for this field, if upb_hassubdef(f).
+  bool subdef_is_symbolic;
+  bool default_is_string;
+  bool subdef_is_owned;
+  upb_fieldtype_t type;
+  upb_label_t label;
  int16_t hasbit;
  uint16_t offset;
-  bool default_is_string;
-  bool active;
  int32_t number;
-  char *name;
-  upb_value defaultval;  // Only meaningful for non-repeated scalars and strings.
+  upb_value defaultval;  // Only for non-repeated scalars and strings.
  upb_value fval;
  struct _upb_accessor_vtbl *accessor;
-  const void *default_ptr;
  const void *prototype;
 } upb_fielddef;

-upb_fielddef *upb_fielddef_new(void);
-void upb_fielddef_ref(upb_fielddef *f);
-void upb_fielddef_unref(upb_fielddef *f);
-upb_fielddef *upb_fielddef_dup(upb_fielddef *f);
+// Returns NULL if memory allocation failed.
+upb_fielddef *upb_fielddef_new(void *owner);
+
+INLINE void upb_fielddef_ref(upb_fielddef *f, void *owner) {
+  upb_def_ref(UPB_UPCAST(f), owner);
+}
+INLINE void upb_fielddef_unref(upb_fielddef *f, void *owner) {
+  upb_def_unref(UPB_UPCAST(f), owner);
+}
+
+// Duplicates the given field, returning NULL if memory allocation failed.
+// When a fielddef is duplicated, the subdef (if any) is made symbolic if it
+// wasn't already.  If the subdef is set but has no name (which is possible
+// since msgdefs are not required to have a name) the new fielddef's subdef
+// will be unset.
+upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, void *owner);
+
+INLINE bool upb_fielddef_ismutable(const upb_fielddef *f) {
+  return upb_def_ismutable(UPB_UPCAST(f));
+}
+INLINE bool upb_fielddef_isfinalized(const upb_fielddef *f) {
+  return !upb_fielddef_ismutable(f);
+}

-// A fielddef is mutable until its msgdef has been added to a symtab.
-bool upb_fielddef_ismutable(const upb_fielddef *f);
+// Simple accessors. ///////////////////////////////////////////////////////////

-// Read accessors.  May be called any time.
-INLINE uint8_t upb_fielddef_type(const upb_fielddef *f) { return f->type; }
-INLINE uint8_t upb_fielddef_label(const upb_fielddef *f) { return f->label; }
+INLINE upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
+  return f->type;
+}
+INLINE upb_label_t upb_fielddef_label(const upb_fielddef *f) {
+  return f->label;
+}
 INLINE int32_t upb_fielddef_number(const upb_fielddef *f) { return f->number; }
-INLINE char *upb_fielddef_name(const upb_fielddef *f) { return f->name; }
+INLINE uint16_t upb_fielddef_offset(const upb_fielddef *f) { return f->offset; }
+INLINE int16_t upb_fielddef_hasbit(const upb_fielddef *f) { return f->hasbit; }
+INLINE const char *upb_fielddef_name(const upb_fielddef *f) {
+  return upb_def_fullname(UPB_UPCAST(f));
+}
 INLINE upb_value upb_fielddef_fval(const upb_fielddef *f) { return f->fval; }
-INLINE bool upb_fielddef_finalized(const upb_fielddef *f) { return f->finalized; }
 INLINE struct _upb_msgdef *upb_fielddef_msgdef(const upb_fielddef *f) {
  return f->msgdef;
 }
 INLINE struct _upb_accessor_vtbl *upb_fielddef_accessor(const upb_fielddef *f) {
  return f->accessor;
 }
-INLINE const char *upb_fielddef_typename(const upb_fielddef *f) {
-  return f->def ? f->def->fqname : NULL;
-}

-// Returns the default value for this fielddef, which may either be something
-// the client set explicitly or the "default default" (0 for numbers, empty for
-// strings).  The field's type indicates the type of the returned value, except
-// for enums.   For enums the default can be set either numerically or
-// symbolically -- the upb_fielddef_default_is_symbolic() function below will
-// indicate which it is.  For string defaults, the value will be a upb_strref
-// which is invalidated by any other call on this object.
-INLINE upb_value upb_fielddef_default(const upb_fielddef *f) {
-  return f->defaultval;
-}
+bool upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type);
+bool upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label);
+void upb_fielddef_sethasbit(upb_fielddef *f, int16_t hasbit);
+void upb_fielddef_setoffset(upb_fielddef *f, uint16_t offset);
+// TODO(haberman): need a way of keeping the fval alive even if some handlers
+// outlast the fielddef.
+void upb_fielddef_setfval(upb_fielddef *f, upb_value fval);
+void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl);

-// The results of this function are only meaningful for enum fields, which can
-// have a default specified either as an integer or as a string.  If this
-// returns true, the default returned from upb_fielddef_default() is a string,
-// otherwise it is an integer.
-INLINE bool upb_fielddef_default_is_symbolic(const upb_fielddef *f) {
-  return f->default_is_string;
+// "Number" and "fullname" must be set before the fielddef is added to a msgdef.
+// For the moment we do not allow these to be set once the fielddef is added to
+// a msgdef -- this could be relaxed in the future.
+bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number);
+INLINE bool upb_fielddef_setname(upb_fielddef *f, const char *name) {
+  return upb_def_setfullname(UPB_UPCAST(f), name);
 }

-// The enum or submessage def for this field, if any.  Only meaningful for
-// submessage, group, and enum fields (ie. when upb_hassubdef(f) is true).
-// Since defs are not linked together until they are in a symtab, this
-// will return NULL until the msgdef is in a symtab.
-upb_def *upb_fielddef_subdef(const upb_fielddef *f);
+// Field type tests. ///////////////////////////////////////////////////////////

-// Write accessors.  "Number" and "name" must be set before the fielddef is
-// added to a msgdef.  For the moment we do not allow these to be set once
-// the fielddef is added to a msgdef -- this could be relaxed in the future.
-bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number);
-bool upb_fielddef_setname(upb_fielddef *f, const char *name);
+INLINE bool upb_issubmsgtype(upb_fieldtype_t type) {
+  return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE);
+}
+INLINE bool upb_isstringtype(upb_fieldtype_t type) {
+  return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES);
+}
+INLINE bool upb_isprimitivetype(upb_fieldtype_t type) {
+  return !upb_issubmsgtype(type) && !upb_isstringtype(type);
+}
+INLINE bool upb_issubmsg(const upb_fielddef *f) {
+  return upb_issubmsgtype(f->type);
+}
+INLINE bool upb_isstring(const upb_fielddef *f) {
+  return upb_isstringtype(f->type);
+}
+INLINE bool upb_isseq(const upb_fielddef *f) {
+  return f->label == UPB_LABEL(REPEATED);
+}

-// These writers may be called at any time prior to being put in a symtab.
-bool upb_fielddef_settype(upb_fielddef *f, uint8_t type);
-bool upb_fielddef_setlabel(upb_fielddef *f, uint8_t label);
-void upb_fielddef_setfval(upb_fielddef *f, upb_value fval);
-void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl);
+// Default value. //////////////////////////////////////////////////////////////

-// The name of the message or enum this field is referring to.  Must be found
-// at name resolution time (when upb_symtab_add() is called).
+// Returns the default value for this fielddef, which may either be something
+// the client set explicitly or the "default default" (0 for numbers, empty for
+// strings).  The field's type indicates the type of the returned value, except
+// for enum fields that are still mutable.
 //
-// NOTE: May only be called for fields whose type has already been set to
-// be a submessage, group, or enum!  Also, will be reset to empty if the
-// field's type is set again.
-bool upb_fielddef_settypename(upb_fielddef *f, const char *name);
-
-// The default value for the field.  For numeric types, use
+// For enums the default can be set either numerically or symbolically -- the
+// upb_fielddef_default_is_symbolic() function below will indicate which it is.
+// For string defaults, the value will be a upb_byteregion which is invalidated
+// by any other non-const call on this object.  Once the fielddef is finalized,
+// symbolic enum defaults are resolved, so finalized enum fielddefs always have
+// a default of type int32.
+INLINE upb_value upb_fielddef_default(const upb_fielddef *f) {
+  return f->defaultval;
+}
+// Sets default value for the field.  For numeric types, use
 // upb_fielddef_setdefault(), and "value" must match the type of the field.
-// For string/bytes types, use upb_fielddef_setdefaultstr().
-// Enum types may use either, since the default may be set either numerically
-// or symbolically.
+// For string/bytes types, use upb_fielddef_setdefaultstr().  Enum types may
+// use either, since the default may be set either numerically or symbolically.
 //
 // NOTE: May only be called for fields whose type has already been set.
 // Also, will be reset to default if the field's type is set again.
 void upb_fielddef_setdefault(upb_fielddef *f, upb_value value);
-void upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len);
+bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len);
 void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str);

-// A variety of tests about the type of a field.
-INLINE bool upb_issubmsgtype(upb_fieldtype_t type) {
-  return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE);
-}
-INLINE bool upb_isstringtype(upb_fieldtype_t type) {
-  return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES);
-}
-INLINE bool upb_isprimitivetype(upb_fieldtype_t type) {
-  return !upb_issubmsgtype(type) && !upb_isstringtype(type);
+// The results of this function are only meaningful for mutable enum fields,
+// which can have a default specified either as an integer or as a string.  If
+// this returns true, the default returned from upb_fielddef_default() is a
+// string, otherwise it is an integer.
+INLINE bool upb_fielddef_default_is_symbolic(const upb_fielddef *f) {
+  assert(f->type == UPB_TYPE(ENUM));
+  return f->default_is_string;
 }
-INLINE bool upb_issubmsg(const upb_fielddef *f) { return upb_issubmsgtype(f->type); }
-INLINE bool upb_isstring(const upb_fielddef *f) { return upb_isstringtype(f->type); }
-INLINE bool upb_isseq(const upb_fielddef *f) { return f->label == UPB_LABEL(REPEATED); }

-// Does the type of this field imply that it should contain an associated def?
+// Subdef. /////////////////////////////////////////////////////////////////////
+
+// Submessage and enum fields must reference a "subdef", which is the
+// upb_msgdef or upb_enumdef that defines their type.  Note that when the
+// fielddef is mutable it may not have a subdef *yet*, but this function still
+// returns true to indicate that the field's type requires a subdef.
 INLINE bool upb_hassubdef(const upb_fielddef *f) {
  return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM);
 }

+// Before a fielddef is finalized, its subdef may be set either directly (with
+// a upb_def*) or symbolically.  Symbolic refs must be resolved before the
+// containing msgdef can be finalized (see upb_resolve() above).  The client is
+// responsible for making sure that "subdef" lives until this fielddef is
+// finalized or deleted.
+//
+// Both methods require that upb_hassubdef(f) (so the type must be set prior
+// to calling these methods).  Returns false if this is not the case, or if
+// the given subdef is not of the correct type.  The subtype is reset if the
+// field's type is changed.
+bool upb_fielddef_setsubdef(upb_fielddef *f, upb_def *subdef);
+bool upb_fielddef_setsubtypename(upb_fielddef *f, const char *name);
+
+// Returns the enum or submessage def or symbolic name for this field, if any.
+// Requires that upb_hassubdef(f).  Returns NULL if the subdef has not been set
+// or if you ask for a subtype name when the subtype is currently set
+// symbolically (or vice-versa).  To access the subtype's name for a linked
+// fielddef, use upb_def_fullname(upb_fielddef_subdef(f)).
+//
+// Caller does *not* own a ref on the returned def or string.
+// upb_fielddef_subtypename() is non-const because finalized defs will never
+// have a symbolic reference (they must be resolved before the msgdef can be
+// finalized).
+upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f);
+const upb_def *upb_fielddef_subdef(const upb_fielddef *f);
+const char *upb_fielddef_subtypename(upb_fielddef *f);
+

 /* upb_msgdef *****************************************************************/

@ -232,31 +356,31 @@ typedef struct _upb_msgdef {
  upb_inttable itof;  // int to field
  upb_strtable ntof;  // name to field

-  // The following fields may be modified until finalized.
+  // The following fields may be modified while mutable.
  uint16_t size;
  uint8_t hasbit_bytes;
  // The range of tag numbers used to store extensions.
  uint32_t extstart, extend;
+  // Used for proto2 integration.
+  const void *prototype;
 } upb_msgdef;

-// Hash table entries for looking up fields by name or number.
-typedef struct {
-  bool junk;
-  upb_fielddef *f;
-} upb_itof_ent;
-typedef struct {
-  upb_fielddef *f;
-} upb_ntof_ent;
+// Returns NULL if memory allocation failed.
+upb_msgdef *upb_msgdef_new(void *owner);

-upb_msgdef *upb_msgdef_new(void);
-INLINE void upb_msgdef_unref(const upb_msgdef *md) { upb_def_unref(UPB_UPCAST(md)); }
-INLINE void upb_msgdef_ref(const upb_msgdef *md) { upb_def_ref(UPB_UPCAST(md)); }
+INLINE void upb_msgdef_unref(const upb_msgdef *md, void *owner) {
+  upb_def_unref(UPB_UPCAST(md), owner);
+}
+INLINE void upb_msgdef_ref(const upb_msgdef *md, void *owner) {
+  upb_def_ref(UPB_UPCAST(md), owner);
+}

 // Returns a new msgdef that is a copy of the given msgdef (and a copy of all
 // the fields) but with any references to submessages broken and replaced with
-// just the name of the submessage.  This can be put back into another symtab
-// and the names will be re-resolved in the new context.
-upb_msgdef *upb_msgdef_dup(const upb_msgdef *m);
+// just the name of the submessage.  Returns NULL if memory allocation failed.
+// This can be put back into another symtab and the names will be re-resolved
+// in the new context.
+upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, void *owner);

 // Read accessors.  May be called at any time.
 INLINE size_t upb_msgdef_size(const upb_msgdef *m) { return m->size; }
@ -271,38 +395,35 @@ void upb_msgdef_setsize(upb_msgdef *m, uint16_t size);
 void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes);
 bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end);

-// Adds a set of fields (upb_fielddef objects) to a msgdef.  Caller retains its
-// ref on the fielddef.  May only be done before the msgdef is in a symtab
-// (requires upb_def_ismutable(m) for the msgdef).  The fielddef's name and
-// number must be set, and the message may not already contain any field with
-// this name or number, and this fielddef may not be part of another message,
-// otherwise false is returned and no action is performed.
-bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *f, int n);
-INLINE bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) {
-  return upb_msgdef_addfields(m, &f, 1);
-}
-
-// Sets the layout of all fields according to default rules:
-// 1. Hasbits for required fields come first, then optional fields.
-// 2. Values are laid out in a way that respects alignment rules.
-// 3. The order is chosen to minimize memory usage.
-// This should only be called once all fielddefs have been added.
-// TODO: will likely want the ability to exclude strings/submessages/arrays.
-// TODO: will likely want the ability to define a header size.
-void upb_msgdef_layout(upb_msgdef *m);
+// Adds a set of fields (upb_fielddef objects) to a msgdef.  Requires that the
+// msgdef and all the fielddefs are mutable.  The fielddef's name and number
+// must be set, and the message may not already contain any field with this
+// name or number, and this fielddef may not be part of another message.  In
+// error cases false is returned and the msgdef is unchanged.
+//
+// On success, the msgdef takes a ref on the fielddef so the caller needn't
+// worry about continuing to keep it alive (however the reverse is not true;
+// refs on the fielddef will *not* keep the msgdef alive).  If ref_donor is
+// non-NULL, caller passes a ref on the fielddef from ref_donor to the msgdef,
+// otherwise caller retains its reference(s) on the defs in f.
+bool upb_msgdef_addfields(
+    upb_msgdef *m, upb_fielddef *const *f, int n, void *ref_donor);
+INLINE bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f,
+                                void *ref_donor) {
+  return upb_msgdef_addfields(m, &f, 1, ref_donor);
+}

 // Looks up a field by name or number.  While these are written to be as fast
 // as possible, it will still be faster to cache the results of this lookup if
 // possible.  These return NULL if no such field is found.
 INLINE upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
-  upb_itof_ent *e = (upb_itof_ent*)
-      upb_inttable_fastlookup(&m->itof, i, sizeof(upb_itof_ent));
-  return e ? e->f : NULL;
+  const upb_value *val = upb_inttable_lookup32(&m->itof, i);
+  return val ? (upb_fielddef*)upb_value_getptr(*val) : NULL;
 }

 INLINE upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name) {
-  upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name);
-  return e ? e->f : NULL;
+  const upb_value *val = upb_strtable_lookup(&m->ntof, name);
+  return val ? (upb_fielddef*)upb_value_getptr(*val) : NULL;
 }

 INLINE int upb_msgdef_numfields(const upb_msgdef *m) {
@ -313,20 +434,19 @@ INLINE int upb_msgdef_numfields(const upb_msgdef *m) {
 // TODO: the iteration should be in field order.
 // Iterators are invalidated when a field is added or removed.
 //   upb_msg_iter i;
-//   for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
-//     upb_fielddef *f = upb_msg_iter_field(i);
+//   for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+//     upb_fielddef *f = upb_msg_iter_field(&i);
 //     // ...
 //   }
 typedef upb_inttable_iter upb_msg_iter;

-upb_msg_iter upb_msg_begin(const upb_msgdef *m);
-upb_msg_iter upb_msg_next(const upb_msgdef *m, upb_msg_iter iter);
-INLINE bool upb_msg_done(upb_msg_iter iter) { return upb_inttable_done(iter); }
+void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m);
+void upb_msg_next(upb_msg_iter *iter);
+INLINE bool upb_msg_done(upb_msg_iter *iter) { return upb_inttable_done(iter); }

 // Iterator accessor.
-INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) {
-  upb_itof_ent *ent = (upb_itof_ent*)upb_inttable_iter_value(iter);
-  return ent->f;
+INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter *iter) {
+  return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
 }


@ -339,84 +459,75 @@ typedef struct _upb_enumdef {
  int32_t defaultval;
 } upb_enumdef;

-typedef struct {
-  uint32_t value;
-} upb_ntoi_ent;
-
-typedef struct {
-  bool junk;
-  char *str;
-} upb_iton_ent;
-
-upb_enumdef *upb_enumdef_new(void);
-INLINE void upb_enumdef_ref(const upb_enumdef *e) { upb_def_ref(UPB_UPCAST(e)); }
-INLINE void upb_enumdef_unref(const upb_enumdef *e) { upb_def_unref(UPB_UPCAST(e)); }
-upb_enumdef *upb_enumdef_dup(const upb_enumdef *e);
+// Returns NULL if memory allocation failed.
+upb_enumdef *upb_enumdef_new(void *owner);
+INLINE void upb_enumdef_ref(const upb_enumdef *e, void *owner) {
+  upb_def_ref(&e->base, owner);
+}
+INLINE void upb_enumdef_unref(const upb_enumdef *e, void *owner) {
+  upb_def_unref(&e->base, owner);
+}
+upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, void *owner);

-INLINE int32_t upb_enumdef_default(upb_enumdef *e) { return e->defaultval; }
+INLINE int32_t upb_enumdef_default(const upb_enumdef *e) {
+  return e->defaultval;
+}

 // May only be set if upb_def_ismutable(e).
 void upb_enumdef_setdefault(upb_enumdef *e, int32_t val);

-// Adds a value to the enumdef.  Requires that no existing val has this
-// name or number (returns false and does not add if there is).  May only
-// be called before the enumdef is in a symtab.
-bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num);
+// Returns the number of values currently defined in the enum.  Note that
+// multiple names can refer to the same number, so this may be greater than the
+// total number of unique numbers.
+INLINE int upb_enumdef_numvals(const upb_enumdef *e) {
+  return upb_strtable_count(&e->ntoi);
+}
+
+// Adds a value to the enumdef.  Requires that no existing val has this name,
+// but duplicate numbers are allowed.  May only be called if the enumdef is
+// mutable.  Returns false if the existing name is used, or if "name" is not a
+// valid label, or on memory allocation failure (we may want to distinguish
+// these failure cases in the future).
+bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num);

-// Lookups from name to integer and vice-versa.
-bool upb_enumdef_ntoil(upb_enumdef *e, const char *name, size_t len, int32_t *num);
-bool upb_enumdef_ntoi(upb_enumdef *e, const char *name, int32_t *num);
-// Caller does not own the returned string.
-const char *upb_enumdef_iton(upb_enumdef *e, int32_t num);
+// Lookups from name to integer, returning true if found.
+bool upb_enumdef_ntoi(const upb_enumdef *e, const char *name, int32_t *num);
+
+// Finds the name corresponding to the given number, or NULL if none was found.
+// If more than one name corresponds to this number, returns the first one that
+// was added.
+const char *upb_enumdef_iton(const upb_enumdef *e, int32_t num);

 // Iteration over name/value pairs.  The order is undefined.
 // Adding an enum val invalidates any iterators.
 //   upb_enum_iter i;
-//   for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
+//   for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
 //     // ...
 //   }
-typedef upb_inttable_iter upb_enum_iter;
+typedef upb_strtable_iter upb_enum_iter;

-upb_enum_iter upb_enum_begin(const upb_enumdef *e);
-upb_enum_iter upb_enum_next(const upb_enumdef *e, upb_enum_iter iter);
-INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); }
+void upb_enum_begin(upb_enum_iter *iter, const upb_enumdef *e);
+void upb_enum_next(upb_enum_iter *iter);
+bool upb_enum_done(upb_enum_iter *iter);

 // Iterator accessors.
-INLINE char *upb_enum_iter_name(upb_enum_iter iter) {
-  upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter);
-  return e->str;
+INLINE const char *upb_enum_iter_name(upb_enum_iter *iter) {
+  return upb_strtable_iter_key(iter);
 }
-INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) {
-  return upb_inttable_iter_key(iter);
+INLINE int32_t upb_enum_iter_number(upb_enum_iter *iter) {
+  return upb_value_getint32(upb_strtable_iter_value(iter));
 }


-/* upb_deflist ****************************************************************/
-
-// upb_deflist is an internal-only dynamic array for storing a growing list of
-// upb_defs.
-typedef struct {
-  upb_def **defs;
-  uint32_t len;
-  uint32_t size;
-} upb_deflist;
-
-void upb_deflist_init(upb_deflist *l);
-void upb_deflist_uninit(upb_deflist *l);
-void upb_deflist_push(upb_deflist *l, upb_def *d);
-
-
 /* upb_symtab *****************************************************************/

-// A symtab (symbol table) is where upb_defs live.  It is empty when first
-// constructed.  Clients add definitions to the symtab (or replace existing
-// definitions) by calling upb_symtab_add().
-struct _upb_symtab {
-  upb_atomic_t refcount;
-  upb_rwlock_t lock;       // Protects all members except the refcount.
-  upb_strtable symtab;     // The symbol table.
-  upb_deflist olddefs;
-};
+// A symtab (symbol table) stores a name->def map of upb_defs.  Clients could
+// always create such tables themselves, but upb_symtab has logic for resolving
+// symbolic references, which is nontrivial.
+typedef struct {
+  uint32_t refcount;
+  upb_strtable symtab;
+} upb_symtab;

 upb_symtab *upb_symtab_new(void);
 void upb_symtab_ref(const upb_symtab *s);
@ -430,33 +541,47 @@ void upb_symtab_unref(const upb_symtab *s);
 //    within this message are searched, then within the parent, on up to the
 //    root namespace).
 //
-// If a def is found, the caller owns one ref on the returned def.  Otherwise
-// returns NULL.
+// If a def is found, the caller owns one ref on the returned def, owned by
+// owner.  Otherwise returns NULL.
 const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
-                                  const char *sym);
+                                  const char *sym, void *owner);

-// Find an entry in the symbol table with this exact name.  If a def is found,
-// the caller owns one ref on the returned def.  Otherwise returns NULL.
-const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym);
-const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym);
+// Finds an entry in the symbol table with this exact name.  If a def is found,
+// the caller owns one ref on the returned def, owned by owner.  Otherwise
+// returns NULL.
+const upb_def *upb_symtab_lookup(
+    const upb_symtab *s, const char *sym, void *owner);
+const upb_msgdef *upb_symtab_lookupmsg(
+    const upb_symtab *s, const char *sym, void *owner);

 // Gets an array of pointers to all currently active defs in this symtab.  The
 // caller owns the returned array (which is of length *count) as well as a ref
-// to each symbol inside.  If type is UPB_DEF_ANY then defs of all types are
-// returned, otherwise only defs of the required type are returned.
-const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *n, upb_deftype_t type);
-
-// Adds the given defs to the symtab, resolving all symbols.  Only one def per
-// name may be in the list, but defs can replace existing defs in the symtab.
+// to each symbol inside (owned by owner).  If type is UPB_DEF_ANY then defs of
+// all types are returned, otherwise only defs of the required type are
+// returned.
+const upb_def **upb_symtab_getdefs(
+    const upb_symtab *s, int *n, upb_deftype_t type, void *owner);
+
+// Adds the given defs to the symtab, resolving all symbols (including enum
+// default values) and finalizing the defs.  Only one def per name may be in
+// the list, but defs can replace existing defs in the symtab.  All defs must
+// have a name -- anonymous defs are not allowed.  Anonymous defs can still be
+// finalized by calling upb_def_finalize() directly.
+//
+// Any existing defs that can reach defs that are being replaced will
+// themselves be replaced also, so that the resulting set of defs is fully
+// consistent.
+//
+// This logic implemented in this method is a convenience; ultimately it calls
+// some combination of upb_fielddef_setsubdef(), upb_def_dup(), and
+// upb_finalize(), any of which the client could call themself.  However, since
+// the logic for doing so is nontrivial, we provide it here.
+//
 // The entire operation either succeeds or fails.  If the operation fails, the
 // symtab is unchanged, false is returned, and status indicates the error.  The
-// caller retains its ref on all defs in all cases.
-bool upb_symtab_add(upb_symtab *s, upb_def **defs, int n, upb_status *status);
-
-// Frees defs that are no longer active in the symtab and are no longer
-// reachable.  Such defs are not freed when they are replaced in the symtab
-// if they are still reachable from defs that are still referenced.
-void upb_symtab_gc(upb_symtab *s);
+// caller passes a ref on all defs to the symtab (even if the operation fails).
+bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
+                    upb_status *status);


 /* upb_def casts **************************************************************/
@ -483,9 +608,9 @@ void upb_symtab_gc(upb_symtab *s);
    return (const struct _upb_ ## lower*)def; \
  }
 UPB_DEF_CASTS(msgdef, MSG);
+UPB_DEF_CASTS(fielddef, FIELD);
 UPB_DEF_CASTS(enumdef, ENUM);
 UPB_DEF_CASTS(svcdef, SERVICE);
-UPB_DEF_CASTS(unresolveddef, UNRESOLVED);
 #undef UPB_DEF_CASTS

 #ifdef __cplusplus
--- a/upb/descriptor/descriptor_const.h
+++ b/upb/descriptor/descriptor_const.h
@ -9,79 +9,47 @@ extern "C" {

 /* Enums. */

-typedef enum google_protobuf_FieldOptions_CType {
-  GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING = 0,
-  GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_CORD = 1,
-  GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING_PIECE = 2
-} google_protobuf_FieldOptions_CType;
-
 typedef enum google_protobuf_FieldDescriptorProto_Type {
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_DOUBLE = 1,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED64 = 6,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_STRING = 9,
  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FLOAT = 2,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT64 = 3,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT64 = 4,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_DOUBLE = 1,
  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT32 = 5,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED64 = 6,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED32 = 15,
  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED32 = 7,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BOOL = 8,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_STRING = 9,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_GROUP = 10,
  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_MESSAGE = 11,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BYTES = 12,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT64 = 3,
  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_ENUM = 14,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED32 = 15,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT64 = 4,
  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED64 = 16,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT32 = 17,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT64 = 18
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BYTES = 12,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT64 = 18,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BOOL = 8,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_GROUP = 10,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT32 = 17
 } google_protobuf_FieldDescriptorProto_Type;

 typedef enum google_protobuf_FieldDescriptorProto_Label {
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_OPTIONAL = 1,
  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REQUIRED = 2,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REPEATED = 3
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REPEATED = 3,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_OPTIONAL = 1
 } google_protobuf_FieldDescriptorProto_Label;

+typedef enum google_protobuf_FieldOptions_CType {
+  GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_CORD = 1,
+  GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING = 0,
+  GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING_PIECE = 2
+} google_protobuf_FieldOptions_CType;
+
 typedef enum google_protobuf_FileOptions_OptimizeMode {
-  GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_SPEED = 1,
  GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_CODE_SIZE = 2,
+  GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_SPEED = 1,
  GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_LITE_RUNTIME = 3
 } google_protobuf_FileOptions_OptimizeMode;

 /* Constants for field names and numbers. */

-#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM 1
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNAME "file"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM 2
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNAME "field"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM 3
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNAME "nested_type"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 4
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNUM 5
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNAME "extension_range"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNUM 6
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNAME "extension"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNUM 7
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
 #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNUM 1
 #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNAME "path"
 #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDTYPE 5
@ -106,6 +74,10 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDNAME "negative_int_value"
 #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDTYPE 3

+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNUM 8
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNAME "aggregate_value"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDTYPE 9
+
 #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNUM 6
 #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNAME "double_value"
 #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDTYPE 1
@ -114,10 +86,6 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDNAME "string_value"
 #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDTYPE 12

-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNUM 8
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNAME "aggregate_value"
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDTYPE 9
-
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNUM 1
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNAME "name"
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDTYPE 9
@ -138,14 +106,6 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type"
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11

-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNUM 6
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNAME "service"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNUM 7
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNAME "extension"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDTYPE 11
-
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNUM 8
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
@ -154,6 +114,14 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDNAME "source_code_info"
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDTYPE 11

+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNUM 6
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNAME "service"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNUM 7
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNAME "extension"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDTYPE 11
+
 #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNUM 1
 #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNAME "name"
 #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDTYPE 9
@ -170,53 +138,13 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
 #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11

-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM 2
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNAME "value"
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
 #define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
 #define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
 #define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11

-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM 2
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNAME "number"
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE 5
-
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNUM 2
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNAME "method"
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNUM 1
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNAME "name_part"
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNUM 2
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNAME "is_extension"
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDTYPE 8
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM 1
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNAME "file"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE 11

 #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNUM 1
 #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNAME "location"
@ -230,6 +158,18 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDNAME "end"
 #define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDTYPE 5

+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM 2
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNAME "number"
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE 5
+
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
 #define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNUM 1
 #define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNAME "ctype"
 #define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDTYPE 14
@ -254,18 +194,6 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDNAME "java_package"
 #define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDTYPE 9

-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNUM 8
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNAME "java_outer_classname"
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNUM 9
-#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNAME "optimize_for"
-#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDTYPE 14
-
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNUM 10
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNAME "java_multiple_files"
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDTYPE 8
-
 #define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNUM 16
 #define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNAME "cc_generic_services"
 #define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDTYPE 8
@ -286,17 +214,69 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
 #define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11

-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNUM 1
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNAME "message_set_wire_format"
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDTYPE 8
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNUM 8
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNAME "java_outer_classname"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDTYPE 9

-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNUM 2
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNAME "no_standard_descriptor_accessor"
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDTYPE 8
+#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNUM 9
+#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNAME "optimize_for"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDTYPE 14

-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNUM 10
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNAME "java_multiple_files"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM 2
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNAME "value"
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNUM 2
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNAME "method"
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM 2
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNAME "field"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM 3
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNAME "nested_type"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 4
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNUM 5
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNAME "extension_range"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNUM 6
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNAME "extension"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNUM 7
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDTYPE 11

 #define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
 #define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
@ -322,6 +302,10 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNAME "type"
 #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDTYPE 14

+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNUM 8
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
 #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNUM 6
 #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNAME "type_name"
 #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDTYPE 9
@ -330,18 +314,34 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNAME "default_value"
 #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDTYPE 9

-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNUM 8
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
 #define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
 #define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
 #define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11

+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNUM 1
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNAME "message_set_wire_format"
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNUM 2
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNAME "no_standard_descriptor_accessor"
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+
 #define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
 #define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
 #define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11

+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNUM 1
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNAME "name_part"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNUM 2
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNAME "is_extension"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDTYPE 8
+
 #ifdef __cplusplus
 }  /* extern "C" */
 #endif
--- a/upb/descriptor/reader.c
+++ b/upb/descriptor/reader.c
@ -8,13 +8,14 @@
 #include <stdlib.h>
 #include <errno.h>
 #include "upb/def.h"
-#include "upb/descriptor.h"
+#include "upb/descriptor/descriptor_const.h"
+#include "upb/descriptor/reader.h"

 // Returns a newly allocated string that joins input strings together, for example:
 //   join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
 //   join("", "Baz") -> "Baz"
 // Caller owns a ref on the returned string. */
-static char *upb_join(char *base, char *name) {
+static char *upb_join(const char *base, const char *name) {
  if (!base || strlen(base) == 0) {
    return strdup(name);
  } else {
@ -27,6 +28,36 @@ static char *upb_join(char *base, char *name) {
  }
 }

+void upb_deflist_init(upb_deflist *l) {
+  l->size = 8;
+  l->defs = malloc(l->size * sizeof(void*));
+  l->len = 0;
+  l->owned = true;
+}
+
+void upb_deflist_uninit(upb_deflist *l) {
+  if (l->owned)
+    for(size_t i = 0; i < l->len; i++)
+      upb_def_unref(l->defs[i], &l->defs);
+  free(l->defs);
+}
+
+void upb_deflist_push(upb_deflist *l, upb_def *d) {
+  if(l->len == l->size) {
+    l->size *= 2;
+    l->defs = realloc(l->defs, l->size * sizeof(void*));
+  }
+  l->defs[l->len++] = d;
+}
+
+void upb_deflist_donaterefs(upb_deflist *l, void *owner) {
+  assert(l->owned);
+  for (size_t i = 0; i < l->len; i++)
+    upb_def_donateref(l->defs[i], &l->defs, owner);
+  l->owned = false;
+}
+
+
 /* upb_descreader  ************************************************************/

 static upb_def *upb_deflist_last(upb_deflist *l) {
@ -37,8 +68,8 @@ static upb_def *upb_deflist_last(upb_deflist *l) {
 static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
  for(uint32_t i = start; i < l->len; i++) {
    upb_def *def = l->defs[i];
-    char *name = def->fqname;
-    def->fqname = upb_join(str, name);
+    char *name = upb_join(str, upb_def_fullname(def));
+    upb_def_setfullname(def, name);
    free(name);
  }
 }
@ -66,9 +97,9 @@ void upb_descreader_uninit(upb_descreader *r) {
  }
 }

-upb_def **upb_descreader_getdefs(upb_descreader *r, int *n) {
+upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) {
  *n = r->defs.len;
-  r->defs.len = 0;
+  upb_deflist_donaterefs(&r->defs, owner);
  return r->defs.defs;
 }

@ -204,7 +235,7 @@ static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r,
    return;
  }
  upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
-  if (upb_inttable_count(&e->iton) == 0) {
+  if (upb_enumdef_numvals(e) == 0) {
    // The default value of an enum (in the absence of an explicit default) is
    // its first listed value.
    upb_enumdef_setdefault(e, r->number);
@ -236,18 +267,18 @@ static upb_mhandlers *upb_enumdef_register_EnumValueDescriptorProto(
 // google.protobuf.EnumDescriptorProto.
 static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_r) {
  upb_descreader *r = _r;
-  upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new()));
+  upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new(&r->defs)));
  return UPB_CONTINUE;
 }

 static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status) {
  upb_descreader *r = _r;
  upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
-  if (upb_descreader_last((upb_descreader*)_r)->fqname == NULL) {
+  if (upb_def_fullname(upb_descreader_last((upb_descreader*)_r)) == NULL) {
    upb_status_seterrliteral(status, "Enum had no name.");
    return;
  }
-  if (upb_inttable_count(&e->iton) == 0) {
+  if (upb_enumdef_numvals(e) == 0) {
    upb_status_seterrliteral(status, "Enum had no values.");
    return;
  }
@ -258,9 +289,9 @@ static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_r,
                                                       upb_value val) {
  (void)fval;
  upb_descreader *r = _r;
-  upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
-  free(e->base.fqname);
-  e->base.fqname = upb_byteregion_strdup(upb_value_getbyteregion(val));
+  char *fullname = upb_byteregion_strdup(upb_value_getbyteregion(val));
+  upb_def_setfullname(upb_descreader_last(r), fullname);
+  free(fullname);
  return UPB_CONTINUE;
 }

@ -284,7 +315,7 @@ static upb_mhandlers *upb_enumdef_register_EnumDescriptorProto(upb_handlers *h)

 static upb_flow_t upb_fielddef_startmsg(void *_r) {
  upb_descreader *r = _r;
-  r->f = upb_fielddef_new();
+  r->f = upb_fielddef_new(&r->defs);
  free(r->default_string);
  r->default_string = NULL;
  return UPB_CONTINUE;
@ -370,13 +401,12 @@ static void upb_fielddef_endmsg(void *_r, upb_status *status) {
  upb_descreader *r = _r;
  upb_fielddef *f = r->f;
  // TODO: verify that all required fields were present.
-  assert(f->number != -1 && f->name != NULL);
-  assert((f->def != NULL) == upb_hassubdef(f));
+  assert(f->number != -1 && upb_fielddef_name(f) != NULL);
+  assert((upb_fielddef_subtypename(f) != NULL) == upb_hassubdef(f));

  // Field was successfully read, add it as a field of the msgdef.
  upb_msgdef *m = upb_descreader_top(r);
-  upb_msgdef_addfield(m, f);
-  upb_fielddef_unref(f);
+  upb_msgdef_addfield(m, f, &r->defs);
  r->f = NULL;

  if (r->default_string) {
@ -435,7 +465,7 @@ static upb_flow_t upb_fielddef_ontypename(void *_r, upb_value fval,
  (void)fval;
  upb_descreader *r = _r;
  char *name = upb_byteregion_strdup(upb_value_getbyteregion(val));
-  upb_fielddef_settypename(r->f, name);
+  upb_fielddef_setsubtypename(r->f, name);
  free(name);
  return UPB_CONTINUE;
 }
@ -479,7 +509,7 @@ static upb_mhandlers *upb_fielddef_register_FieldDescriptorProto(
 // google.protobuf.DescriptorProto.
 static upb_flow_t upb_msgdef_startmsg(void *_r) {
  upb_descreader *r = _r;
-  upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new()));
+  upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new(&r->defs)));
  upb_descreader_startcontainer(r);
  return UPB_CONTINUE;
 }
@ -487,7 +517,7 @@ static upb_flow_t upb_msgdef_startmsg(void *_r) {
 static void upb_msgdef_endmsg(void *_r, upb_status *status) {
  upb_descreader *r = _r;
  upb_msgdef *m = upb_descreader_top(r);
-  if(!m->base.fqname) {
+  if(!upb_def_fullname(UPB_UPCAST(m))) {
    upb_status_seterrliteral(status, "Encountered message with no name.");
    return;
  }
@ -497,11 +527,10 @@ static void upb_msgdef_endmsg(void *_r, upb_status *status) {
 static upb_flow_t upb_msgdef_onname(void *_r, upb_value fval, upb_value val) {
  (void)fval;
  upb_descreader *r = _r;
-  assert(val.type == UPB_TYPE(STRING));
  upb_msgdef *m = upb_descreader_top(r);
-  free(m->base.fqname);
-  m->base.fqname = upb_byteregion_strdup(upb_value_getbyteregion(val));
-  upb_descreader_setscopename(r, strdup(m->base.fqname));
+  char *name = upb_byteregion_strdup(upb_value_getbyteregion(val));
+  upb_def_setfullname(UPB_UPCAST(m), name);
+  upb_descreader_setscopename(r, name);  // Passes ownership of name.
  return UPB_CONTINUE;
 }

@ -530,4 +559,3 @@ static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h) {
 }
 #undef FNUM
 #undef FTYPE
-
--- a/upb/descriptor/reader.h
+++ b/upb/descriptor/reader.h
@ -4,9 +4,9 @@
 * Copyright (c) 2011 Google Inc.  See LICENSE for details.
 * Author: Josh Haberman <jhaberman@gmail.com>
 *
- * Routines for building defs by parsing descriptors in descriptor.proto format.
- * This only needs to use the public API of upb_symtab.  Later we may also
- * add routines for dumping a symtab to a descriptor.
+ * upb_descreader provides a set of sink handlers that will build defs from a
+ * data source that uses the descriptor.proto schema (like a protobuf binary
+ * descriptor).
 */

 #ifndef UPB_DESCRIPTOR_H
@ -18,6 +18,20 @@
 extern "C" {
 #endif

+/* upb_deflist ****************************************************************/
+
+// upb_deflist is an internal-only dynamic array for storing a growing list of
+// upb_defs.
+typedef struct {
+  upb_def **defs;
+  size_t len;
+  size_t size;
+  bool owned;
+} upb_deflist;
+
+void upb_deflist_init(upb_deflist *l);
+void upb_deflist_uninit(upb_deflist *l);
+void upb_deflist_push(upb_deflist *l, upb_def *d);

 /* upb_descreader  ************************************************************/

@ -56,11 +70,11 @@ void upb_descreader_uninit(upb_descreader *r);
 upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h);

 // Gets the array of defs that have been parsed and removes them from the
-// descreader.  Ownership of the defs is passed to the caller, but the
-// ownership of the returned array is retained and is invalidated by any other
-// call into the descreader.  The defs will not have been resolved, and are
-// ready to be added to a symtab.
-upb_def **upb_descreader_getdefs(upb_descreader *r, int *n);
+// descreader.  Ownership of the defs is passed to the caller using the given
+// owner), but the ownership of the returned array is retained and is
+// invalidated by any other call into the descreader.  The defs will not have
+// been resolved, and are ready to be added to a symtab.
+upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n);

 #ifdef __cplusplus
 }  /* extern "C" */
--- a/upb/handlers.c
+++ b/upb/handlers.c
@ -13,7 +13,7 @@

 static upb_mhandlers *upb_mhandlers_new() {
  upb_mhandlers *m = malloc(sizeof(*m));
-  upb_inttable_init(&m->fieldtab, 8, sizeof(upb_itofhandlers_ent));
+  upb_inttable_init(&m->fieldtab);
  m->startmsg = NULL;
  m->endmsg = NULL;
  m->is_group = false;
@ -26,20 +26,19 @@ static upb_mhandlers *upb_mhandlers_new() {
 static upb_fhandlers *_upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
                                                  upb_fieldtype_t type,
                                                  bool repeated) {
-  upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, n);
+  const upb_value *v = upb_inttable_lookup(&m->fieldtab, n);
  // TODO: design/refine the API for changing the set of fields or modifying
  // existing handlers.
-  if (e) return NULL;
-  upb_fhandlers new_f = {type, repeated, UPB_ATOMIC_INIT(0),
+  if (v) return NULL;
+  upb_fhandlers new_f = {type, repeated, 0,
      n, -1, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL,
 #ifdef UPB_USE_JIT_X64
      0, 0, 0,
 #endif
-      NULL};
+  };
  upb_fhandlers *ptr = malloc(sizeof(*ptr));
  memcpy(ptr, &new_f, sizeof(upb_fhandlers));
-  upb_itofhandlers_ent ent = {false, ptr};
-  upb_inttable_insert(&m->fieldtab, n, &ent);
+  upb_inttable_insert(&m->fieldtab, n, upb_value_ptr(ptr));
  return ptr;
 }

@ -64,12 +63,17 @@ upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n,
  return f;
 }

+upb_fhandlers *upb_mhandlers_lookup(const upb_mhandlers *m, uint32_t n) {
+  const upb_value *v = upb_inttable_lookup(&m->fieldtab, n);
+  return v ? upb_value_getptr(*v) : NULL;
+}
+

 /* upb_handlers ***************************************************************/

 upb_handlers *upb_handlers_new() {
  upb_handlers *h = malloc(sizeof(*h));
-  upb_atomic_init(&h->refcount, 1);
+  h->refcount = 1;
  h->msgs_len = 0;
  h->msgs_size = 4;
  h->msgs = malloc(h->msgs_size * sizeof(*h->msgs));
@ -77,19 +81,18 @@ upb_handlers *upb_handlers_new() {
  return h;
 }

-void upb_handlers_ref(upb_handlers *h) { upb_atomic_ref(&h->refcount); }
+void upb_handlers_ref(upb_handlers *h) { h->refcount++; }

 void upb_handlers_unref(upb_handlers *h) {
-  if (upb_atomic_unref(&h->refcount)) {
+  if (--h->refcount == 0) {
    for (int i = 0; i < h->msgs_len; i++) {
      upb_mhandlers *mh = h->msgs[i];
-      for(upb_inttable_iter j = upb_inttable_begin(&mh->fieldtab);
-          !upb_inttable_done(j);
-          j = upb_inttable_next(&mh->fieldtab, j)) {
-        upb_itofhandlers_ent *e = upb_inttable_iter_value(j);
-        free(e->f);
+      upb_inttable_iter j;
+      upb_inttable_begin(&j, &mh->fieldtab);
+      for(; !upb_inttable_done(&j); upb_inttable_next(&j)) {
+        free(upb_value_getptr(upb_inttable_iter_value(&j)));
      }
-      upb_inttable_free(&mh->fieldtab);
+      upb_inttable_uninit(&mh->fieldtab);
 #ifdef UPB_USE_JIT_X64
      free(mh->tablearray);
 #endif
@ -110,31 +113,28 @@ upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h) {
  return mh;
 }

-typedef struct {
-  upb_mhandlers *mh;
-} upb_mtab_ent;
-
 static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, const upb_msgdef *m,
                                     upb_onmsgreg *msgreg_cb,
                                     upb_onfieldreg *fieldreg_cb,
                                     void *closure, upb_strtable *mtab) {
  upb_mhandlers *mh = upb_handlers_newmhandlers(h);
-  upb_mtab_ent e = {mh};
-  upb_strtable_insert(mtab, m->base.fqname, &e);
+  upb_strtable_insert(mtab, upb_def_fullname(UPB_UPCAST(m)), upb_value_ptr(mh));
  if (msgreg_cb) msgreg_cb(closure, mh, m);
  upb_msg_iter i;
-  for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
-    upb_fielddef *f = upb_msg_iter_field(i);
+  for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+    upb_fielddef *f = upb_msg_iter_field(&i);
    upb_fhandlers *fh;
    if (upb_issubmsg(f)) {
      upb_mhandlers *sub_mh;
-      upb_mtab_ent *subm_ent;
+      const upb_value *subm_ent;
      // The table lookup is necessary to break the DFS for type cycles.
-      if ((subm_ent = upb_strtable_lookup(mtab, f->def->fqname)) != NULL) {
-        sub_mh = subm_ent->mh;
+      const char *subname = upb_def_fullname(upb_fielddef_subdef(f));
+      if ((subm_ent = upb_strtable_lookup(mtab, subname)) != NULL) {
+        sub_mh = upb_value_getptr(*subm_ent);
      } else {
-        sub_mh = upb_regmsg_dfs(h, upb_downcast_msgdef(f->def), msgreg_cb,
-                                fieldreg_cb, closure, mtab);
+        sub_mh = upb_regmsg_dfs(
+            h, upb_downcast_msgdef_const(upb_fielddef_subdef(f)),
+            msgreg_cb, fieldreg_cb, closure, mtab);
      }
      fh = upb_mhandlers_newfhandlers_subm(
          mh, f->number, f->type, upb_isseq(f), sub_mh);
@ -151,10 +151,10 @@ upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m,
                                      upb_onfieldreg *fieldreg_cb,
                                      void *closure) {
  upb_strtable mtab;
-  upb_strtable_init(&mtab, 8, sizeof(upb_mtab_ent));
+  upb_strtable_init(&mtab);
  upb_mhandlers *ret =
      upb_regmsg_dfs(h, m, msgreg_cb, fieldreg_cb, closure, &mtab);
-  upb_strtable_free(&mtab);
+  upb_strtable_uninit(&mtab);
  return ret;
 }

@ -212,6 +212,7 @@ upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,

  upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
  if (f->startseq) sflow = f->startseq(d->top->closure, f->fval);
+  _upb_dispatcher_sethas(d->top->closure, f->hasbit);
  if (sflow.flow != UPB_CONTINUE) {
    _upb_dispatcher_abortjmp(d);
  }
@ -247,6 +248,7 @@ upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,

  upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
  if (f->startsubmsg) sflow = f->startsubmsg(d->top->closure, f->fval);
+  _upb_dispatcher_sethas(d->top->closure, f->hasbit);
  if (sflow.flow != UPB_CONTINUE) {
    _upb_dispatcher_abortjmp(d);
  }
--- a/upb/handlers.h
+++ b/upb/handlers.h
@ -9,6 +9,10 @@
 * for each message and/or field as the data is being parsed or iterated over,
 * without having to know the source format that we are parsing from.  This
 * decouples the parsing logic from the processing logic.
+ *
+ * TODO: should we allow handlers to longjmp()?  Would be necessary to eg. let
+ * a Lua handler "yield" from the current coroutine.  I *think* everything
+ * would "just work" with our current decoder.
 */

 #ifndef UPB_HANDLERS_H
@ -141,9 +145,9 @@ struct _upb_mhandlers;
 typedef struct _upb_fieldent {
  upb_fieldtype_t type;
  bool repeated;
-  upb_atomic_t refcount;
+  uint32_t refcount;
  uint32_t number;
-  int32_t valuehasbit;
+  int32_t hasbit;
  struct _upb_mhandlers *msg;
  struct _upb_mhandlers *submsg;  // Set iff upb_issubmsgtype(type) == true.
  upb_value fval;
@ -157,14 +161,8 @@ typedef struct _upb_fieldent {
  uint32_t jit_pclabel_notypecheck;
  uint32_t jit_submsg_done_pclabel;
 #endif
-  void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
 } upb_fhandlers;

-typedef struct {
-  bool junk;  // Stolen by table impl; see table.h for details.
-  upb_fhandlers *f;
-} upb_itofhandlers_ent;
-
 // fhandlers are created as part of a upb_handlers instance, but can be ref'd
 // and unref'd to prolong the life of the handlers.
 void upb_fhandlers_ref(upb_fhandlers *m);
@ -174,6 +172,8 @@ void upb_fhandlers_unref(upb_fhandlers *m);
 #define UPB_FHANDLERS_ACCESSORS(name, type) \
  INLINE void upb_fhandlers_set ## name(upb_fhandlers *f, type v){f->name = v;} \
  INLINE type upb_fhandlers_get ## name(const upb_fhandlers *f) { return f->name; }
+// TODO(haberman): need a way of keeping the fval alive even if a plan outlasts
+// the handlers.
 UPB_FHANDLERS_ACCESSORS(fval, upb_value)
 UPB_FHANDLERS_ACCESSORS(value, upb_value_handler*)
 UPB_FHANDLERS_ACCESSORS(startsubmsg, upb_startfield_handler*)
@ -182,11 +182,13 @@ UPB_FHANDLERS_ACCESSORS(startseq, upb_startfield_handler*)
 UPB_FHANDLERS_ACCESSORS(endseq, upb_endfield_handler*)
 UPB_FHANDLERS_ACCESSORS(msg, struct _upb_mhandlers*)
 UPB_FHANDLERS_ACCESSORS(submsg, struct _upb_mhandlers*)
-// If set to >= 0, the hasbit will automatically be set after the corresponding
-// callback is called (when a JIT is enabled, this can be significantly more
-// efficient than setting the hasbit yourself inside the callback).  Could add
-// this for seq and submsg also, but doesn't look like a win at the moment.
-UPB_FHANDLERS_ACCESSORS(valuehasbit, int32_t)
+// If set to >= 0, the hasbit will automatically be set when the corresponding
+// field is parsed (when a JIT is enabled, this can be significantly more
+// efficient than setting the hasbit yourself inside the callback).  For values
+// it is undefined whether the hasbit is set before or after the callback is
+// called.  For seq and submsg, the hasbit is set *after* the start handler is
+// called, but before any of the handlers for the submsg or sequence.
+UPB_FHANDLERS_ACCESSORS(hasbit, int32_t)


 /* upb_mhandlers **************************************************************/
@ -195,7 +197,7 @@ UPB_FHANDLERS_ACCESSORS(valuehasbit, int32_t)
 // message in the graph of messages.

 typedef struct _upb_mhandlers {
-  upb_atomic_t refcount;
+  uint32_t refcount;
  upb_startmsg_handler *startmsg;
  upb_endmsg_handler *endmsg;
  upb_inttable fieldtab;  // Maps field number -> upb_fhandlers.
@ -203,6 +205,7 @@ typedef struct _upb_mhandlers {
 #ifdef UPB_USE_JIT_X64
  // Used inside the JIT to track labels (jmp targets) in the generated code.
  uint32_t jit_startmsg_pclabel;  // Starting a parse of this (sub-)message.
+  uint32_t jit_afterstartmsg_pclabel;  // After calling the startmsg handler.
  uint32_t jit_endofbuf_pclabel;  // ptr hitend, but delim_end or jit_end?
  uint32_t jit_endofmsg_pclabel;  // Done parsing this (sub-)message.
  uint32_t jit_dyndispatch_pclabel;  // Dispatch by table lookup.
@ -240,11 +243,14 @@ upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n,
 UPB_MHANDLERS_ACCESSORS(startmsg, upb_startmsg_handler*);
 UPB_MHANDLERS_ACCESSORS(endmsg, upb_endmsg_handler*);

+// Returns fhandlers for the given field, or NULL if none.
+upb_fhandlers *upb_mhandlers_lookup(const upb_mhandlers *m, uint32_t n);
+

 /* upb_handlers ***************************************************************/

 struct _upb_handlers {
-  upb_atomic_t refcount;
+  uint32_t refcount;
  upb_mhandlers **msgs;  // Array of msgdefs, [0]=toplevel.
  int msgs_len, msgs_size;
  bool should_jit;
@ -267,8 +273,10 @@ upb_mhandlers *upb_handlers_getmhandlers(upb_handlers *h, int index);
 // with "fieldreg_cb"
 //
 // See upb_handlers_reghandlerset() below for an example.
-typedef void upb_onmsgreg(void *closure, upb_mhandlers *mh, const upb_msgdef *m);
-typedef void upb_onfieldreg(void *closure, upb_fhandlers *mh, const upb_fielddef *m);
+typedef void upb_onmsgreg(
+    void *closure, upb_mhandlers *mh, const upb_msgdef *m);
+typedef void upb_onfieldreg(
+    void *closure, upb_fhandlers *fh, const upb_fielddef *f);
 upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m,
                                      upb_onmsgreg *msgreg_cb,
                                      upb_onfieldreg *fieldreg_cb,
@ -305,8 +313,8 @@ INLINE void upb_onfreg_hset(void *c, upb_fhandlers *fh, const upb_fielddef *f) {
  upb_value_setfielddef(&val, f);
  upb_fhandlers_setfval(fh, val);
 }
-INLINE upb_mhandlers *upb_handlers_reghandlerset(upb_handlers *h, const upb_msgdef *m,
-                                                 upb_handlerset *hs) {
+INLINE upb_mhandlers *upb_handlers_reghandlerset(
+    upb_handlers *h, const upb_msgdef *m, upb_handlerset *hs) {
  return upb_handlers_regmsgdef(h, m, &upb_onmreg_hset, &upb_onfreg_hset, hs);
 }

@ -373,7 +381,7 @@ INLINE void upb_dispatch_value(upb_dispatcher *d, upb_fhandlers *f,
                               upb_value val) {
  upb_flow_t flow = UPB_CONTINUE;
  if (f->value) flow = f->value(d->top->closure, f->fval, val);
-  _upb_dispatcher_sethas(d->top->closure, f->valuehasbit);
+  _upb_dispatcher_sethas(d->top->closure, f->hasbit);
  if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d);
 }
 void upb_dispatch_startmsg(upb_dispatcher *d);
@ -381,7 +389,8 @@ void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status);
 upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
                                               upb_fhandlers *f);
 upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d);
-upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, upb_fhandlers *f);
+upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
+                                            upb_fhandlers *f);
 upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d);

 #ifdef __cplusplus
--- a/upb/msg.c
+++ b/upb/msg.c
@ -4,101 +4,12 @@
 * Copyright (c) 2010 Google Inc.  See LICENSE for details.
 * Author: Josh Haberman <jhaberman@gmail.com>
 *
- * Data structure for storing a message of protobuf data.
 */

 #include "upb/upb.h"
 #include "upb/msg.h"

-void upb_msg_clear(void *msg, const upb_msgdef *md) {
-  assert(msg != NULL);
-  memset(msg, 0, md->hasbit_bytes);
-  // TODO: set primitive fields to defaults?
-}
-
-void *upb_stdarray_append(upb_stdarray *a, size_t type_size) {
-  assert(a != NULL);
-  assert(a->len <= a->size);
-  if (a->len == a->size) {
-    size_t old_size = a->size;
-    a->size = old_size == 0 ? 8 : (old_size * 2);
-    a->ptr = realloc(a->ptr, a->size * type_size);
-    memset(&a->ptr[old_size * type_size], 0, (a->size - old_size) * type_size);
-  }
-  return &a->ptr[a->len++ * type_size];
-}
-
-#if 0
-static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md,
-                                   upb_dispatcher *d);
-
-static upb_flow_t upb_msg_pushval(upb_value val, upb_fielddef *f,
-                                  upb_dispatcher *d, upb_fhandlers *hf) {
-  if (upb_issubmsg(f)) {
-    upb_msg *msg = upb_value_getmsg(val);
-    upb_dispatch_startsubmsg(d, hf);
-    upb_msg_dispatch(msg, upb_downcast_msgdef(f->def), d);
-    upb_dispatch_endsubmsg(d);
-  } else {
-    upb_dispatch_value(d, hf, val);
-  }
-  return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md,
-                                   upb_dispatcher *d) {
-  upb_msg_iter i;
-  for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
-    upb_fielddef *f = upb_msg_iter_field(i);
-    if (!upb_msg_has(msg, f)) continue;
-    upb_fhandlers *hf = upb_dispatcher_lookup(d, f->number);
-    if (!hf) continue;
-    upb_value val = upb_msg_get(msg, f);
-    if (upb_isarray(f)) {
-      upb_array *arr = upb_value_getarr(val);
-      for (uint32_t j = 0; j < upb_array_len(arr); ++j) {
-        upb_msg_pushval(upb_array_get(arr, f, j), f, d, hf);
-      }
-    } else {
-      upb_msg_pushval(val, f, d, hf);
-    }
-  }
-  return UPB_CONTINUE;
-}
-
-void upb_msg_runhandlers(upb_msg *msg, upb_msgdef *md, upb_handlers *h,
-                         void *closure, upb_status *status) {
-  upb_dispatcher d;
-  upb_dispatcher_init(&d, h, NULL, NULL, NULL);
-  upb_dispatcher_reset(&d, closure);
-
-  upb_dispatch_startmsg(&d);
-  upb_msg_dispatch(msg, md, &d);
-  upb_dispatch_endmsg(&d, status);
-
-  upb_dispatcher_uninit(&d);
-}
-#endif
-
-/* Standard writers. **********************************************************/
-
-void upb_stdmsg_sethas(void *_m, upb_value fval) {
-  assert(_m != NULL);
-  char *m = _m;
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  if (f->hasbit >= 0)
-    m[(uint32_t)f->hasbit / 8] |= (1 << ((uint32_t)f->hasbit % 8));
-}
-
-bool upb_stdmsg_has(const void *_m, upb_value fval) {
-  assert(_m != NULL);
-  const char *m = _m;
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  return f->hasbit < 0 ||
-      (m[(uint32_t)f->hasbit / 8] & (1 << ((uint32_t)f->hasbit % 8)));
-}
-
-#define UPB_ACCESSORS(type, ctype)                                            \
+#define UPB_ACCESSOR(type, ctype)                                             \
  upb_flow_t upb_stdmsg_set ## type (void *_m, upb_value fval,                \
                                     upb_value val) {                         \
    assert(_m != NULL);                                                       \
@ -108,230 +19,17 @@ bool upb_stdmsg_has(const void *_m, upb_value fval) {
    *(ctype*)&m[f->offset] = upb_value_get ## type(val);                      \
    return UPB_CONTINUE;                                                      \
  }                                                                           \
-                                                                              \
-  upb_flow_t upb_stdmsg_set ## type ## _r(void *a, upb_value _fval,           \
-                                          upb_value val) {                    \
-    (void)_fval;                                                              \
-    assert(a != NULL);                                                        \
-    ctype *p = upb_stdarray_append((upb_stdarray*)a, sizeof(ctype));          \
-    *p = upb_value_get ## type(val);                                          \
-    return UPB_CONTINUE;                                                      \
-  }                                                                           \
-                                                                              \
-  upb_value upb_stdmsg_get ## type(const void *_m, upb_value fval) {          \
-    assert(_m != NULL);                                                       \
-    const uint8_t *m = _m;                                                    \
-    const upb_fielddef *f = upb_value_getfielddef(fval);                      \
-    upb_value ret;                                                            \
-    upb_value_set ## type(&ret, *(ctype*)&m[f->offset]);                      \
-    return ret;                                                               \
-  }                                                                           \
-  upb_value upb_stdmsg_seqget ## type(const void *i) {                        \
-    assert(i != NULL);                                                        \
-    upb_value val;                                                            \
-    upb_value_set ## type(&val, *(ctype*)i);                                  \
-    return val;                                                               \
-  }

-UPB_ACCESSORS(double, double)
-UPB_ACCESSORS(float, float)
-UPB_ACCESSORS(int32, int32_t)
-UPB_ACCESSORS(int64, int64_t)
-UPB_ACCESSORS(uint32, uint32_t)
-UPB_ACCESSORS(uint64, uint64_t)
-UPB_ACCESSORS(bool, bool)
-UPB_ACCESSORS(ptr, void*)
+UPB_ACCESSOR(double, double)
+UPB_ACCESSOR(float, float)
+UPB_ACCESSOR(int32, int32_t)
+UPB_ACCESSOR(int64, int64_t)
+UPB_ACCESSOR(uint32, uint32_t)
+UPB_ACCESSOR(uint64, uint64_t)
+UPB_ACCESSOR(bool, bool)
+UPB_ACCESSOR(ptr, void*)
 #undef UPB_ACCESSORS

-static void _upb_stdmsg_setstr(void *_dst, upb_value src) {
-  upb_stdarray **dstp = _dst;
-  upb_stdarray *dst = *dstp;
-  if (!dst) {
-    dst = malloc(sizeof(*dst));
-    dst->size = 0;
-    dst->ptr = NULL;
-    *dstp = dst;
-  }
-  dst->len = 0;
-  const upb_byteregion *bytes = upb_value_getbyteregion(src);
-  uint32_t len = upb_byteregion_len(bytes);
-  if (len > dst->size) {
-    dst->size = len;
-    dst->ptr = realloc(dst->ptr, dst->size);
-  }
-  dst->len = len;
-  upb_byteregion_copyall(bytes, dst->ptr);
-}
-
-upb_flow_t upb_stdmsg_setstr(void *_m, upb_value fval, upb_value val) {
-  assert(_m != NULL);
-  char *m = _m;
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  // Hasbit automatically set by the handlers.
-  _upb_stdmsg_setstr(&m[f->offset], val);
-  return UPB_CONTINUE;
-}
-
-upb_flow_t upb_stdmsg_setstr_r(void *a, upb_value fval, upb_value val) {
-  assert(a != NULL);
-  (void)fval;
-  _upb_stdmsg_setstr(upb_stdarray_append((upb_stdarray*)a, sizeof(void*)), val);
-  return UPB_CONTINUE;
-}
-
-upb_value upb_stdmsg_getstr(const void *m, upb_value fval) {
-  assert(m != NULL);
-  return upb_stdmsg_getptr(m, fval);
-}
-
-upb_value upb_stdmsg_seqgetstr(const void *i) {
-  assert(i != NULL);
-  return upb_stdmsg_seqgetptr(i);
-}
-
-void *upb_stdmsg_new(const upb_msgdef *md) {
-  void *m = malloc(md->size);
-  memset(m, 0, md->size);
-  upb_msg_clear(m, md);
-  return m;
-}
-
-void upb_stdseq_free(void *s, upb_fielddef *f) {
-  upb_stdarray *a = s;
-  if (upb_issubmsg(f) || upb_isstring(f)) {
-    void **p = (void**)a->ptr;
-    for (uint32_t i = 0; i < a->size; i++) {
-      if (upb_issubmsg(f)) {
-        upb_stdmsg_free(p[i], upb_downcast_msgdef(f->def));
-      } else {
-        upb_stdarray *str = p[i];
-        free(str->ptr);
-        free(str);
-      }
-    }
-  }
-  free(a->ptr);
-  free(a);
-}
-
-void upb_stdmsg_free(void *m, const upb_msgdef *md) {
-  if (m == NULL) return;
-  upb_msg_iter i;
-  for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
-    upb_fielddef *f = upb_msg_iter_field(i);
-    if (!upb_isseq(f) && !upb_issubmsg(f) && !upb_isstring(f)) continue;
-    void *subp = upb_value_getptr(upb_stdmsg_getptr(m, f->fval));
-    if (subp == NULL) continue;
-    if (upb_isseq(f)) {
-      upb_stdseq_free(subp, f);
-    } else if (upb_issubmsg(f)) {
-      upb_stdmsg_free(subp, upb_downcast_msgdef(f->def));
-    } else {
-      upb_stdarray *str = subp;
-      free(str->ptr);
-      free(str);
-    }
-  }
-  free(m);
-}
-
-upb_sflow_t upb_stdmsg_startseq(void *_m, upb_value fval) {
-  char *m = _m;
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  upb_stdarray **arr = (void*)&m[f->offset];
-  if (!upb_stdmsg_has(_m, fval)) {
-    if (!*arr) {
-      *arr = malloc(sizeof(**arr));
-      (*arr)->size = 0;
-      (*arr)->ptr = NULL;
-    }
-    (*arr)->len = 0;
-    upb_stdmsg_sethas(m, fval);
-  }
-  return UPB_CONTINUE_WITH(*arr);
-}
-
-void upb_stdmsg_recycle(void **m, const upb_msgdef *md) {
-  if (*m)
-    upb_msg_clear(*m, md);
-  else
-    *m = upb_stdmsg_new(md);
-}
-
-upb_sflow_t upb_stdmsg_startsubmsg(void *_m, upb_value fval) {
-  assert(_m != NULL);
-  char *m = _m;
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  void **subm = (void*)&m[f->offset];
-  if (!upb_stdmsg_has(m, fval)) {
-    upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def));
-    upb_stdmsg_sethas(m, fval);
-  }
-  return UPB_CONTINUE_WITH(*subm);
-}
-
-upb_sflow_t upb_stdmsg_startsubmsg_r(void *a, upb_value fval) {
-  assert(a != NULL);
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  void **subm = upb_stdarray_append((upb_stdarray*)a, sizeof(void*));
-  upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def));
-  return UPB_CONTINUE_WITH(*subm);
-}
-
-const void *upb_stdmsg_seqbegin(const void *_a) {
-  const upb_stdarray *a = _a;
-  return a->len > 0 ? a->ptr : NULL;
-}
-
-#define NEXTFUNC(size) \
-  const void *upb_stdmsg_ ## size ## byte_seqnext(const void *_a, const void *iter) {\
-    const upb_stdarray *a = _a;                                          \
-    const void *next = (char*)iter + size;                               \
-    return (char*)next < (char*)a->ptr + (a->len * size) ? next : NULL;  \
-  }
-
-NEXTFUNC(8)
-NEXTFUNC(4)
-NEXTFUNC(1)
-
-#define STDMSG(type, size) { static upb_accessor_vtbl vtbl = { \
-    &upb_stdmsg_startsubmsg, \
-    &upb_stdmsg_set ## type, \
-    &upb_stdmsg_startseq, \
-    &upb_stdmsg_startsubmsg_r, \
-    &upb_stdmsg_set ## type ## _r, \
-    &upb_stdmsg_has, \
-    &upb_stdmsg_getptr, \
-    &upb_stdmsg_get ## type, \
-    &upb_stdmsg_seqbegin, \
-    &upb_stdmsg_ ## size ## byte_seqnext, \
-    &upb_stdmsg_seqget ## type}; \
-  return &vtbl; }
-
-upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f) {
-  switch (f->type) {
-    case UPB_TYPE(DOUBLE): STDMSG(double, 8)
-    case UPB_TYPE(FLOAT): STDMSG(float, 4)
-    case UPB_TYPE(UINT64):
-    case UPB_TYPE(FIXED64): STDMSG(uint64, 8)
-    case UPB_TYPE(INT64):
-    case UPB_TYPE(SFIXED64):
-    case UPB_TYPE(SINT64): STDMSG(int64, 8)
-    case UPB_TYPE(INT32):
-    case UPB_TYPE(SINT32):
-    case UPB_TYPE(ENUM):
-    case UPB_TYPE(SFIXED32): STDMSG(int32, 4)
-    case UPB_TYPE(UINT32):
-    case UPB_TYPE(FIXED32): STDMSG(uint32, 4)
-    case UPB_TYPE(BOOL): STDMSG(bool, 1)
-    case UPB_TYPE(STRING):
-    case UPB_TYPE(BYTES):
-    case UPB_TYPE(GROUP):
-    case UPB_TYPE(MESSAGE): STDMSG(str, 8)  // TODO: 32-bit
-  }
-  return NULL;
-}
-
 static void upb_accessors_onfreg(void *c, upb_fhandlers *fh,
                                 const upb_fielddef *f) {
  (void)c;
@ -344,7 +42,7 @@ static void upb_accessors_onfreg(void *c, upb_fhandlers *fh,
    } else {
      upb_fhandlers_setvalue(fh, f->accessor->set);
      upb_fhandlers_setstartsubmsg(fh, f->accessor->startsubmsg);
-      upb_fhandlers_setvaluehasbit(fh, f->hasbit);
+      upb_fhandlers_sethasbit(fh, f->hasbit);
    }
  }
 }
--- a/upb/msg.h
+++ b/upb/msg.h
@ -68,34 +68,18 @@ typedef struct _upb_accessor_vtbl {
  upb_seqget_handler     *seqget;
 } upb_accessor_vtbl;

-// Registers handlers for writing into a message of the given type.
+// Registers handlers for writing into a message of the given type using
+// whatever accessors it has defined.
 upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, const upb_msgdef *m);

-// Returns an stdmsg accessor for the given fielddef.
-upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f);
-
-
-/* upb_msg/upb_seq ************************************************************/
-
-// upb_msg and upb_seq allow for generic access to a message through its
-// accessor vtable.  Note that these do *not* allow you to create, destroy, or
-// take references on the objects -- these operations are specifically outside
-// the scope of what the accessors define.
-
-// Clears all hasbits.
-// TODO: Add a separate function for setting primitive values back to their
-// defaults (but not strings, submessages, or arrays).
-void upb_msg_clear(void *msg, const upb_msgdef *md);
-
 INLINE void upb_msg_clearbit(void *msg, const upb_fielddef *f) {
  ((char*)msg)[f->hasbit / 8] &= ~(1 << (f->hasbit % 8));
 }

-// Could add a method that recursively clears submessages, strings, and
-// arrays if desired.  This could be a win if you wanted to merge without
-// needing hasbits, because during parsing you would never clear submessages
-// or arrays.  Also this could be desired to provide proto2 operations on
-// generated messages.
+/* upb_msg/upb_seq ************************************************************/
+
+// These accessor functions are simply convenience methods for reading or
+// writing to a message through its accessors.

 INLINE bool upb_msg_has(const void *m, const upb_fielddef *f) {
  return f->accessor && f->accessor->has(m, f->fval);
@ -148,65 +132,11 @@ INLINE bool upb_msg_get_named(const void *m, const upb_msgdef *md,
  return true;
 }

-
-/* upb_msgvisitor *************************************************************/
-
-// A upb_msgvisitor reads data from an in-memory structure using its accessors,
-// pushing the results to a given set of upb_handlers.
-// TODO: not yet implemented.
-
-typedef struct {
-  upb_fhandlers *fh;
-  upb_fielddef *f;
-  uint16_t msgindex;  // Only when upb_issubmsg(f).
-} upb_msgvisitor_field;
-
-typedef struct {
-  upb_msgvisitor_field *fields;
-  int fields_len;
-} upb_msgvisitor_msg;
-
-typedef struct {
-  uint16_t msgindex;
-  uint16_t fieldindex;
-  uint32_t arrayindex;  // UINT32_MAX if not an array frame.
-} upb_msgvisitor_frame;
-
-typedef struct {
-  upb_msgvisitor_msg *messages;
-  int messages_len;
-  upb_dispatcher dispatcher;
-} upb_msgvisitor;
-
-// Initializes a msgvisitor that will push data from messages of the given
-// msgdef to the given set of handlers.
-void upb_msgvisitor_init(upb_msgvisitor *v, upb_msgdef *md, upb_handlers *h);
-void upb_msgvisitor_uninit(upb_msgvisitor *v);
-
-void upb_msgvisitor_reset(upb_msgvisitor *v, void *m);
-void upb_msgvisitor_visit(upb_msgvisitor *v, upb_status *status);
-
-
-/* Standard writers. **********************************************************/
-
-// Allocates a new stdmsg.
-void *upb_stdmsg_new(const upb_msgdef *md);
-
-// Recursively frees any strings or submessages that the message refers to.
-void upb_stdmsg_free(void *m, const upb_msgdef *md);
-
-void upb_stdmsg_sethas(void *_m, upb_value fval);
-
-// "hasbit" must be <= UPB_MAX_FIELDS.  If it is <0, this field has no hasbit.
-upb_value upb_stdmsg_packfval(int16_t hasbit, uint16_t value_offset);
-upb_value upb_stdmsg_packfval_subm(int16_t hasbit, uint16_t value_offset,
-                                   uint16_t subm_size, uint8_t subm_setbytes);
-
 // Value writers for every in-memory type: write the data to a known offset
-// from the closure "c" and set the hasbit (if any).
-// TODO: can we get away with having only one for int64, uint64, double, etc?
-// The main thing in the way atm is that the upb_value is strongly typed.
-// in debug mode.
+// from the closure "c."
+//
+// TODO(haberman): instead of having standard writer functions, should we have
+// a bool in the accessor that says "write raw value to the field's offset"?
 upb_flow_t upb_stdmsg_setint64(void *c, upb_value fval, upb_value val);
 upb_flow_t upb_stdmsg_setint32(void *c, upb_value fval, upb_value val);
 upb_flow_t upb_stdmsg_setuint64(void *c, upb_value fval, upb_value val);
@ -216,94 +146,6 @@ upb_flow_t upb_stdmsg_setfloat(void *c, upb_value fval, upb_value val);
 upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val);
 upb_flow_t upb_stdmsg_setptr(void *c, upb_value fval, upb_value val);

-// Value writers for repeated fields: the closure points to a standard array
-// struct, appends the value to the end of the array, resizing with realloc()
-// if necessary.
-typedef struct {
-  char *ptr;
-  uint32_t len;   // Number of elements present.
-  uint32_t size;  // Number of elements allocated.
-} upb_stdarray;
-
-void *upb_stdarray_append(upb_stdarray *a, size_t type_size);
-
-upb_flow_t upb_stdmsg_setint64_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setint32_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setuint64_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setuint32_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setdouble_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setfloat_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setbool_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setptr_r(void *c, upb_value fval, upb_value val);
-
-// Writers for C strings (NULL-terminated): we can find a char* at a known
-// offset from the closure "c".  Calls realloc() on the pointer to allocate
-// the memory (TODO: investigate whether checking malloc_usable_size() would
-// be cheaper than realloc()).  Also sets the hasbit, if any.
-//
-// Since the string is NULL terminated and does not store an explicit length,
-// these are not suitable for binary data that can contain NULLs.
-upb_flow_t upb_stdmsg_setcstr(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setcstr_r(void *c, upb_value fval, upb_value val);
-
-// Writers for length-delimited strings: we explicitly store the length, so
-// the data can contain NULLs.  Stores the data using upb_stdarray
-// which is located at a known offset from the closure "c" (note that it
-// is included inline rather than pointed to).  Also sets the hasbit, if any.
-upb_flow_t upb_stdmsg_setstr(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setstr_r(void *c, upb_value fval, upb_value val);
-
-// Writers for startseq and startmsg which allocate (or reuse, if possible)
-// a sub data structure (upb_stdarray or a submessage, respectively),
-// setting the hasbit.  If the hasbit is already set, the existing data
-// structure is used verbatim.  If the hasbit is not already set, the pointer
-// is checked for NULL.  If it is NULL, a new substructure is allocated,
-// cleared, and used.  If it is not NULL, the existing substructure is
-// cleared and reused.
-//
-// If there is no hasbit, we always behave as if the hasbit was not set,
-// so any existing data for this array or submessage is cleared.  In most
-// cases this will be fine since each array or non-repeated submessage should
-// occur at most once in the stream.  But if the client is using "concatenation
-// as merging", it will want to make sure hasbits are allocated so merges can
-// happen appropriately.
-//
-// If there was a demand for the behavior that absence of a hasbit acts as if
-// the bit was always set, we could provide that also.  But Clear() would need
-// to act recursively, which is less efficient since it requires an extra pass
-// over the tree.
-upb_sflow_t upb_stdmsg_startseq(void *c, upb_value fval);
-upb_sflow_t upb_stdmsg_startsubmsg(void *c, upb_value fval);
-upb_sflow_t upb_stdmsg_startsubmsg_r(void *c, upb_value fval);
-
-
-/* Standard readers. **********************************************************/
-
-bool upb_stdmsg_has(const void *c, upb_value fval);
-const void *upb_stdmsg_seqbegin(const void *c);
-
-upb_value upb_stdmsg_getint64(const void *c, upb_value fval);
-upb_value upb_stdmsg_getint32(const void *c, upb_value fval);
-upb_value upb_stdmsg_getuint64(const void *c, upb_value fval);
-upb_value upb_stdmsg_getuint32(const void *c, upb_value fval);
-upb_value upb_stdmsg_getdouble(const void *c, upb_value fval);
-upb_value upb_stdmsg_getfloat(const void *c, upb_value fval);
-upb_value upb_stdmsg_getbool(const void *c, upb_value fval);
-upb_value upb_stdmsg_getptr(const void *c, upb_value fval);
-
-const void *upb_stdmsg_8byte_seqnext(const void *c, const void *iter);
-const void *upb_stdmsg_4byte_seqnext(const void *c, const void *iter);
-const void *upb_stdmsg_1byte_seqnext(const void *c, const void *iter);
-
-upb_value upb_stdmsg_seqgetint64(const void *c);
-upb_value upb_stdmsg_seqgetint32(const void *c);
-upb_value upb_stdmsg_seqgetuint64(const void *c);
-upb_value upb_stdmsg_seqgetuint32(const void *c);
-upb_value upb_stdmsg_seqgetdouble(const void *c);
-upb_value upb_stdmsg_seqgetfloat(const void *c);
-upb_value upb_stdmsg_seqgetbool(const void *c);
-upb_value upb_stdmsg_seqgetptr(const void *c);
-
 #ifdef __cplusplus
 }  /* extern "C" */
 #endif
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@ -13,6 +13,33 @@
 #include "upb/pb/decoder.h"
 #include "upb/pb/varint.h"

+typedef struct {
+  uint8_t native_wire_type;
+  bool is_numeric;
+} upb_decoder_typeinfo;
+
+static const upb_decoder_typeinfo upb_decoder_types[] = {
+  {UPB_WIRE_TYPE_END_GROUP,   false},  // ENDGROUP
+  {UPB_WIRE_TYPE_64BIT,       true},   // DOUBLE
+  {UPB_WIRE_TYPE_32BIT,       true},   // FLOAT
+  {UPB_WIRE_TYPE_VARINT,      true},   // INT64
+  {UPB_WIRE_TYPE_VARINT,      true},   // UINT64
+  {UPB_WIRE_TYPE_VARINT,      true},   // INT32
+  {UPB_WIRE_TYPE_64BIT,       true},   // FIXED64
+  {UPB_WIRE_TYPE_32BIT,       true},   // FIXED32
+  {UPB_WIRE_TYPE_VARINT,      true},   // BOOL
+  {UPB_WIRE_TYPE_DELIMITED,   false},  // STRING
+  {UPB_WIRE_TYPE_START_GROUP, false},  // GROUP
+  {UPB_WIRE_TYPE_DELIMITED,   false},  // MESSAGE
+  {UPB_WIRE_TYPE_DELIMITED,   false},  // BYTES
+  {UPB_WIRE_TYPE_VARINT,      true},   // UINT32
+  {UPB_WIRE_TYPE_VARINT,      true},   // ENUM
+  {UPB_WIRE_TYPE_32BIT,       true},   // SFIXED32
+  {UPB_WIRE_TYPE_64BIT,       true},   // SFIXED64
+  {UPB_WIRE_TYPE_VARINT,      true},   // SINT32
+  {UPB_WIRE_TYPE_VARINT,      true},   // SINT64
+};
+
 /* upb_decoderplan ************************************************************/

 #ifdef UPB_USE_JIT_X64
@ -32,37 +59,6 @@
 #include "upb/pb/decoder_x64.h"
 #endif

-typedef struct {
-  upb_fhandlers base;
-  void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
-#ifdef UPB_USE_JIT_X64
-  uint32_t jit_pclabel;
-  uint32_t jit_pclabel_notypecheck;
-#endif
-} upb_dplanfield;
-
-typedef struct {
-  upb_mhandlers base;
-#ifdef UPB_USE_JIT_X64
-  uint32_t jit_startmsg_pclabel;
-  uint32_t jit_endofbuf_pclabel;
-  uint32_t jit_endofmsg_pclabel;
-  uint32_t jit_dyndispatch_pclabel;
-  uint32_t jit_unknownfield_pclabel;
-  int32_t jit_parent_field_done_pclabel;
-  uint32_t max_field_number;
-  // Currently keyed on field number.  Could also try keying it
-  // on encoded or decoded tag, or on encoded field number.
-  void **tablearray;
-#endif
-} upb_dplanmsg;
-
-static void *upb_decoderplan_fptrs[];
-
-void upb_decoderplan_initfhandlers(upb_fhandlers *f) {
-  f->decode = upb_decoderplan_fptrs[f->type];
-}
-
 upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) {
  upb_decoderplan *p = malloc(sizeof(*p));
  p->handlers = h;
@ -72,17 +68,6 @@ upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) {
  p->jit_code = NULL;
  if (allowjit) upb_decoderplan_makejit(p);
 #endif
-  // Set function pointers for each field's decode function.
-  for (int i = 0; i < h->msgs_len; i++) {
-    upb_mhandlers *m = h->msgs[i];
-    for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab);
-        !upb_inttable_done(i);
-        i = upb_inttable_next(&m->fieldtab, i)) {
-      upb_itofhandlers_ent *e = upb_inttable_iter_value(i);
-      upb_fhandlers *f = e->f;
-      upb_decoderplan_initfhandlers(f);
-    }
-  }
  return p;
 }

@ -396,14 +381,6 @@ static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
  upb_push_msg(d, f, upb_decoder_offset(d) + len);
 }

-#define F(type) &upb_decode_ ## type
-static void *upb_decoderplan_fptrs[] = {
-    &upb_endgroup, F(DOUBLE), F(FLOAT), F(INT64),
-    F(UINT64), F(INT32), F(FIXED64), F(FIXED32), F(BOOL), F(STRING),
-    F(GROUP), F(MESSAGE), F(STRING), F(UINT32), F(ENUM), F(SFIXED32),
-    F(SFIXED64), F(SINT32), F(SINT64)};
-#undef F
-

 /* The main decoding loop *****************************************************/

@ -431,16 +408,18 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
    if (!upb_trydecode_varint32(d, &tag)) return NULL;
    uint8_t wire_type = tag & 0x7;
    uint32_t fieldnum = tag >> 3;
-    upb_itofhandlers_ent *e = upb_inttable_fastlookup(
-        d->dispatch_table, fieldnum, sizeof(upb_itofhandlers_ent));
-    upb_fhandlers *f = e ? e->f : NULL;
+    const upb_value *val = upb_inttable_lookup32(d->dispatch_table, fieldnum);
+    upb_fhandlers *f = val ? upb_value_getptr(*val) : NULL;
+    bool is_packed = false;

    if (f) {
      // Wire type check.
-      if (wire_type == upb_types[f->type].native_wire_type ||
-          (wire_type == UPB_WIRE_TYPE_DELIMITED &&
-           upb_types[f->type].is_numeric)) {
+      if (wire_type == upb_decoder_types[f->type].native_wire_type) {
        // Wire type is ok.
+      } else if ((wire_type == UPB_WIRE_TYPE_DELIMITED &&
+                 upb_decoder_types[f->type].is_numeric)) {
+        // Wire type is ok (and packed).
+        is_packed = true;
      } else {
        f = NULL;
      }
@ -453,19 +432,18 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
    if (fr->is_sequence && fr->f != f) {
      upb_dispatch_endseq(&d->dispatcher);
      upb_decoder_setmsgend(d);
+      fr = d->dispatcher.top;
    }
-    if (f && f->repeated && (!fr->is_sequence || fr->f != f)) {
-      uint64_t old_end = d->dispatcher.top->end_ofs;
-      upb_dispatcher_frame *fr = upb_dispatch_startseq(&d->dispatcher, f);
-      if (wire_type != UPB_WIRE_TYPE_DELIMITED ||
-          upb_issubmsgtype(f->type) || upb_isstringtype(f->type)) {
-        // Non-packed field -- this tag pertains to only a single message.
-        fr->end_ofs = old_end;
-      } else {
+    if (f && f->repeated && !fr->is_sequence) {
+      upb_dispatcher_frame *fr2 = upb_dispatch_startseq(&d->dispatcher, f);
+      if (is_packed) {
        // Packed primitive field.
        uint32_t len = upb_decode_varint32(d);
-        fr->end_ofs = upb_decoder_offset(d) + len;
-        fr->is_packed = true;
+        fr2->end_ofs = upb_decoder_offset(d) + len;
+        fr2->is_packed = true;
+      } else {
+        // Non-packed field -- this tag pertains to only a single message.
+        fr2->end_ofs = fr->end_ofs;
      }
      upb_decoder_setmsgend(d);
    }
@ -513,13 +491,37 @@ upb_success_t upb_decoder_decode(upb_decoder *d) {
    if (!d->top_is_packed) f = upb_decode_tag(d);
    if (!f) {
      // Sucessful EOF.  We may need to dispatch a top-level implicit frame.
-      if (d->dispatcher.top == d->dispatcher.stack + 1) {
-        assert(d->dispatcher.top->is_sequence);
+      if (d->dispatcher.top->is_sequence) {
+        assert(d->dispatcher.top == d->dispatcher.stack + 1);
        upb_dispatch_endseq(&d->dispatcher);
      }
+      assert(d->dispatcher.top == d->dispatcher.stack);
+      upb_dispatch_endmsg(&d->dispatcher, &d->status);
      return UPB_OK;
    }
-    f->decode(d, f);
+
+    switch (f->type) {
+      case UPB_TYPE_ENDGROUP:  upb_endgroup(d, f);        break;
+      case UPB_TYPE(DOUBLE):   upb_decode_DOUBLE(d, f);   break;
+      case UPB_TYPE(FLOAT):    upb_decode_FLOAT(d, f);    break;
+      case UPB_TYPE(INT64):    upb_decode_INT64(d, f);    break;
+      case UPB_TYPE(UINT64):   upb_decode_UINT64(d, f);   break;
+      case UPB_TYPE(INT32):    upb_decode_INT32(d, f);    break;
+      case UPB_TYPE(FIXED64):  upb_decode_FIXED64(d, f);  break;
+      case UPB_TYPE(FIXED32):  upb_decode_FIXED32(d, f);  break;
+      case UPB_TYPE(BOOL):     upb_decode_BOOL(d, f);     break;
+      case UPB_TYPE(STRING):
+      case UPB_TYPE(BYTES):    upb_decode_STRING(d, f);   break;
+      case UPB_TYPE(GROUP):    upb_decode_GROUP(d, f);    break;
+      case UPB_TYPE(MESSAGE):  upb_decode_MESSAGE(d, f);  break;
+      case UPB_TYPE(UINT32):   upb_decode_UINT32(d, f);   break;
+      case UPB_TYPE(ENUM):     upb_decode_ENUM(d, f);     break;
+      case UPB_TYPE(SFIXED32): upb_decode_SFIXED32(d, f); break;
+      case UPB_TYPE(SFIXED64): upb_decode_SFIXED64(d, f); break;
+      case UPB_TYPE(SINT32):   upb_decode_SINT32(d, f);   break;
+      case UPB_TYPE(SINT64):   upb_decode_SINT64(d, f);   break;
+      case UPB_TYPE_NONE: assert(false); break;
+    }
    upb_decoder_checkpoint(d);
  }
 }
@ -542,7 +544,6 @@ void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset) {
 void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input,
                            void *closure) {
  assert(d->plan);
-  assert(upb_byteregion_discardofs(input) == upb_byteregion_startofs(input));
  upb_dispatcher_frame *f =
      upb_dispatcher_reset(&d->dispatcher, closure, d->plan->handlers->msgs[0]);
  upb_status_clear(&d->status);
--- a/upb/pb/decoder_x64.dasc
+++ b/upb/pb/decoder_x64.dasc
@ -9,8 +9,8 @@
 |// parsing the specific message and calling specific handlers.
 |//
 |// Since the JIT can call other functions (the JIT'ted code is not a leaf
-|// function) we must respect alignment rules.  On OS X, this means aligning
-|// the stack to 16 bytes.
+|// function) we must respect alignment rules.  All x86-64 systems require
+|// 16-byte stack alignment.

 #include <sys/mman.h>
 #include "dynasm/dasm_x86.h"
@ -103,7 +103,7 @@ void upb_reg_jit_gdb(upb_decoderplan *plan) {
 // Has to be a separate function, otherwise GCC will complain about
 // expressions like (&foo != NULL) because they will never evaluate
 // to false.
-static void upb_assert_notnull(void *addr) { assert(addr != NULL); }
+static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }

 |.arch x64
 |.actionlist upb_jit_actionlist
@ -401,45 +401,10 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan,
  }
 }

-#if 0
-// These appear not to speed things up, but keeping around for
-// further experimentation.
-static void upb_decoderplan_jit_doappend(upb_decoderplan *plan, uint8_t size,
-                                         upb_fhandlers *f) {
-  |  mov   eax, STDARRAY:ARG1_64->len
-  |  cmp   eax, STDARRAY:ARG1_64->size
-  |  jne   >2
-  // If array is full, fall back to actual function.
-  |  loadfval f
-  |  callp  f->value
-  |  jmp   >3
-  |2:
-  |  mov   rcx, STDARRAY:ARG1_64->ptr
-  |  mov   esi, eax
-  |  add   eax, 1
-
-  switch (size) {
-    case 8:
-      |  mov   [rcx + rsi * 8], ARG3_64
-      break;
-
-    case 4:
-      |  mov   [rcx + rsi * 4], ARG3_32
-      break;
-
-    case 1:
-      |  mov   [rcx + rsi * 4], ARG3_8
-      break;
-  }
-
-  |  mov   STDARRAY:ARG1_64->len, eax
-  |3:
-}
-#endif
-
 static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
                                       upb_fhandlers *f) {
-  // Call callbacks.
+  // Call callbacks.  Specializing the append accessors didn't yield a speed
+  // increase in benchmarks.
  if (upb_issubmsgtype(f->type)) {
    if (f->type == UPB_TYPE(MESSAGE)) {
      |   mov   rsi, PTR
@ -457,7 +422,10 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
      |  mov   ARG1_64, CLOSURE
      |  loadfval f
      |  callp f->startsubmsg
+      |  sethas CLOSURE, f->hasbit
      |  mov  CLOSURE, rdx
+    } else {
+      |  sethas CLOSURE, f->hasbit
    }
    |  mov   qword FRAME->closure, CLOSURE
    // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
@ -465,6 +433,7 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,

    const upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
    |  call  =>sub_m->jit_startmsg_pclabel;
+    |  popframe upb_fhandlers_getmsg(f)

    // Call endsubmsg handler (if any).
    if (f->endsubmsg) {
@ -473,7 +442,6 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
      |  loadfval  f
      |  callp f->endsubmsg
    }
-    |   popframe upb_fhandlers_getmsg(f)
    // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
    |  mov   DECODER->ptr, PTR
  } else {
@ -494,21 +462,6 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
    } else if (f->value == &upb_stdmsg_setbool) {
      const upb_fielddef *fd = upb_value_getfielddef(f->fval);
      |  mov   [ARG1_64 + fd->offset], ARG3_8
-#if 0
-    // These appear not to speed things up, but keeping around for
-    // further experimentation.
-    } else if (f->value == &upb_stdmsg_setint64_r ||
-        f->value == &upb_stdmsg_setuint64_r ||
-        f->value == &upb_stdmsg_setptr_r ||
-        f->value == &upb_stdmsg_setdouble_r) {
-      upb_decoderplan_jit_doappend(plan, 8, f);
-    } else if (f->value == &upb_stdmsg_setint32_r ||
-               f->value == &upb_stdmsg_setuint32_r ||
-               f->value == &upb_stdmsg_setfloat_r) {
-      upb_decoderplan_jit_doappend(plan, 4, f);
-    } else if (f->value == &upb_stdmsg_setbool_r) {
-      upb_decoderplan_jit_doappend(plan, 1, f);
-#endif
    } else if (f->value) {
      // Load closure and fval into arg registers.
      ||#ifndef NDEBUG
@ -520,16 +473,26 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
      |  loadfval f
      |  callp  f->value
    }
-    |  sethas CLOSURE, f->valuehasbit
+    |  sethas CLOSURE, f->hasbit
    // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
    |  mov   DECODER->ptr, PTR
  }
 }

+static uint64_t upb_get_encoded_tag(upb_fhandlers *f) {
+  uint32_t tag = (f->number << 3) | upb_decoder_types[f->type].native_wire_type;
+  uint64_t encoded_tag = upb_vencode32(tag);
+  // No tag should be greater than 5 bytes.
+  assert(encoded_tag <= 0xffffffffff);
+  return encoded_tag;
+}
+
 // PTR should point to the beginning of the tag.
-static void upb_decoderplan_jit_field(upb_decoderplan *plan, uint64_t tag,
-                                      uint64_t next_tag, upb_mhandlers *m,
+static void upb_decoderplan_jit_field(upb_decoderplan *plan, upb_mhandlers *m,
                                      upb_fhandlers *f, upb_fhandlers *next_f) {
+  uint64_t tag = upb_get_encoded_tag(f);
+  uint64_t next_tag = next_f ? upb_get_encoded_tag(next_f) : 0;
+
  // PC-label for the dispatch table.
  // We check the wire type (which must be loaded in edx) because the
  // table is keyed on field number, not type.
@ -541,10 +504,13 @@ static void upb_decoderplan_jit_field(upb_decoderplan *plan, uint64_t tag,
    |  mov   rsi, FRAME->end_ofs
    |  pushframe  f, rsi, true
    if (f->startseq) {
-      |  mov   ARG1_64, CLOSURE
+      |  mov    ARG1_64, CLOSURE
      |  loadfval f
-      |  callp f->startseq
-      |  mov   CLOSURE, rdx
+      |  callp  f->startseq
+      |  sethas CLOSURE, f->hasbit
+      |  mov    CLOSURE, rdx
+    } else {
+      |  sethas CLOSURE, f->hasbit
    }
    |  mov   qword FRAME->closure, CLOSURE
  }
@ -590,6 +556,11 @@ static int upb_compare_uint32(const void *a, const void *b) {
 }

 static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) {
+  |=>m->jit_afterstartmsg_pclabel:
+  // There was a call to get here, so we need to align the stack.
+  |  sub  rsp, 8
+  |  jmp  >1
+
  |=>m->jit_startmsg_pclabel:
  // There was a call to get here, so we need to align the stack.
  |  sub  rsp, 8
@ -602,6 +573,7 @@ static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) {
    // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
  }

+  |1:
  |  setmsgend  m
  |  check_eob   m
  |  mov    ecx, dword [PTR]
@ -616,30 +588,19 @@ static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) {
  int num_keys = upb_inttable_count(&m->fieldtab);
  uint32_t *keys = malloc(num_keys * sizeof(*keys));
  int idx = 0;
-  for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab);
-      !upb_inttable_done(i);
-      i = upb_inttable_next(&m->fieldtab, i)) {
-    keys[idx++] = upb_inttable_iter_key(i);
+  upb_inttable_iter i;
+  upb_inttable_begin(&i, &m->fieldtab);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    keys[idx++] = upb_inttable_iter_key(&i);
  }
  qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);

-  upb_fhandlers *last_f = NULL;
-  uint64_t last_encoded_tag = 0;
  for(int i = 0; i < num_keys; i++) {
-    uint32_t fieldnum = keys[i];
-    upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, fieldnum);
-    upb_fhandlers *f = e->f;
-    assert(f->number == fieldnum);
-    uint32_t tag = (f->number << 3) | upb_types[f->type].native_wire_type;
-    uint64_t encoded_tag = upb_vencode32(tag);
-    // No tag should be greater than 5 bytes.
-    assert(encoded_tag <= 0xffffffffff);
-    if (last_f) upb_decoderplan_jit_field(
-        plan, last_encoded_tag, encoded_tag, m, last_f, f);
-    last_encoded_tag = encoded_tag;
-    last_f = f;
+    upb_fhandlers *f = upb_mhandlers_lookup(m, keys[i]);
+    upb_fhandlers *next_f =
+        (i + 1 < num_keys) ? upb_mhandlers_lookup(m, keys[i + 1]) : NULL;
+    upb_decoderplan_jit_field(plan, m, f, next_f);
  }
-  upb_decoderplan_jit_field(plan, last_encoded_tag, 0, m, last_f, NULL);

  free(keys);

@ -733,18 +694,19 @@ static void upb_decoderplan_jit_assignfieldlabs(upb_fhandlers *f,
 static void upb_decoderplan_jit_assignmsglabs(upb_mhandlers *m,
                                              uint32_t *pclabel_count) {
  m->jit_startmsg_pclabel = (*pclabel_count)++;
+  m->jit_afterstartmsg_pclabel = (*pclabel_count)++;
  m->jit_endofbuf_pclabel = (*pclabel_count)++;
  m->jit_endofmsg_pclabel = (*pclabel_count)++;
  m->jit_dyndispatch_pclabel = (*pclabel_count)++;
  m->jit_unknownfield_pclabel = (*pclabel_count)++;
  m->max_field_number = 0;
  upb_inttable_iter i;
-  for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
-      i = upb_inttable_next(&m->fieldtab, i)) {
-    uint32_t key = upb_inttable_iter_key(i);
+  upb_inttable_begin(&i, &m->fieldtab);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    uint32_t key = upb_inttable_iter_key(&i);
    m->max_field_number = UPB_MAX(m->max_field_number, key);
-    upb_itofhandlers_ent *e = upb_inttable_iter_value(i);
-    upb_decoderplan_jit_assignfieldlabs(e->f, pclabel_count);
+    upb_fhandlers *f = upb_value_getptr(upb_inttable_iter_value(&i));
+    upb_decoderplan_jit_assignfieldlabs(f, pclabel_count);
  }
  // TODO: support large field numbers by either using a hash table or
  // generating code for a binary search.  For now large field numbers
@ -784,11 +746,12 @@ static void upb_decoderplan_makejit(upb_decoderplan *plan) {
  // Create dispatch tables.
  for (int i = 0; i < h->msgs_len; i++) {
    upb_mhandlers *m = h->msgs[i];
+    // We jump to after the startmsg handler since it is called before entering
+    // the JIT (either by upb_decoder or by a previous call to the JIT).
    m->jit_func =
-        plan->jit_code + dasm_getpclabel(plan, m->jit_startmsg_pclabel);
+        plan->jit_code + dasm_getpclabel(plan, m->jit_afterstartmsg_pclabel);
    for (uint32_t j = 0; j <= m->max_field_number; j++) {
-      upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, j);
-      upb_fhandlers *f = e ? e->f : NULL;
+      upb_fhandlers *f = upb_mhandlers_lookup(m, j);
      if (f) {
        m->tablearray[j] =
            plan->jit_code + dasm_getpclabel(plan, f->jit_pclabel);
--- a/upb/pb/glue.c
+++ b/upb/pb/glue.c
@ -1,84 +1,17 @@
 /*
 * upb - a minimalist implementation of protocol buffers.
 *
- * Copyright (c) 2010 Google Inc.  See LICENSE for details.
+ * Copyright (c) 2010-2012 Google Inc.  See LICENSE for details.
 * Author: Josh Haberman <jhaberman@gmail.com>
 */

 #include "upb/bytestream.h"
-#include "upb/descriptor.h"
-#include "upb/msg.h"
+#include "upb/descriptor/reader.h"
 #include "upb/pb/decoder.h"
 #include "upb/pb/glue.h"
-#include "upb/pb/textprinter.h"
-
-bool upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md,
-                  bool allow_jit, upb_status *status) {
-  upb_stringsrc strsrc;
-  upb_stringsrc_init(&strsrc);
-  upb_stringsrc_reset(&strsrc, str, len);
-
-  upb_decoder d;
-  upb_handlers *h = upb_handlers_new();
-  upb_accessors_reghandlers(h, md);
-  upb_decoderplan *p = upb_decoderplan_new(h, allow_jit);
-  upb_decoder_init(&d);
-  upb_handlers_unref(h);
-  upb_decoder_resetplan(&d, p, 0);
-  upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), msg);
-  upb_success_t ret = upb_decoder_decode(&d);
-  // stringsrc and the handlers registered by upb_accessors_reghandlers()
-  // should not suspend.
-  assert((ret == UPB_OK) == upb_ok(upb_decoder_status(&d)));
-  if (status) upb_status_copy(status, upb_decoder_status(&d));
-
-  upb_stringsrc_uninit(&strsrc);
-  upb_decoder_uninit(&d);
-  upb_decoderplan_unref(p);
-  return ret == UPB_OK;
-}
-
-void *upb_filetonewmsg(const char *fname, const upb_msgdef *md, upb_status *s) {
-  void *msg = upb_stdmsg_new(md);
-  size_t len;
-  char *data = upb_readfile(fname, &len);
-  if (!data) goto err;
-  upb_strtomsg(data, len, msg, md, false, s);
-  if (!upb_ok(s)) goto err;
-  return msg;
-
-err:
-  upb_stdmsg_free(msg, md);
-  return NULL;
-}
-
-#if 0
-void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
-                   bool single_line) {
-  upb_stringsink strsink;
-  upb_stringsink_init(&strsink);
-  upb_stringsink_reset(&strsink, str);
-
-  upb_textprinter *p = upb_textprinter_new();
-  upb_handlers *h = upb_handlers_new();
-  upb_textprinter_reghandlers(h, md);
-  upb_textprinter_reset(p, upb_stringsink_bytesink(&strsink), single_line);
-
-  upb_status status = UPB_STATUS_INIT;
-  upb_msg_runhandlers(msg, md, h, p, &status);
-  // None of {upb_msg_runhandlers, upb_textprinter, upb_stringsink} should be
-  // capable of returning an error.
-  assert(upb_ok(&status));
-  upb_status_uninit(&status);
-
-  upb_stringsink_uninit(&strsink);
-  upb_textprinter_free(p);
-  upb_handlers_unref(h);
-}
-#endif

 upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
-                                        upb_status *status) {
+                                        void *owner, upb_status *status) {
  upb_stringsrc strsrc;
  upb_stringsrc_init(&strsrc);
  upb_stringsrc_reset(&strsrc, str, len);
@ -104,35 +37,20 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
    upb_descreader_uninit(&r);
    return NULL;
  }
-  upb_def **defs = upb_descreader_getdefs(&r, n);
+  upb_def **defs = upb_descreader_getdefs(&r, owner, n);
  upb_def **defscopy = malloc(sizeof(upb_def*) * (*n));
  memcpy(defscopy, defs, sizeof(upb_def*) * (*n));
  upb_descreader_uninit(&r);

-  // Set default accessors and layouts on all messages.
-  for(int i = 0; i < *n; i++) {
-    upb_def *def = defscopy[i];
-    upb_msgdef *md = upb_dyncast_msgdef(def);
-    if (!md) continue;
-    // For field in msgdef:
-    upb_msg_iter i;
-    for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
-      upb_fielddef *f = upb_msg_iter_field(i);
-      upb_fielddef_setaccessor(f, upb_stdmsg_accessor(f));
-    }
-    upb_msgdef_layout(md);
-  }
-
  return defscopy;
 }

 bool upb_load_descriptor_into_symtab(upb_symtab *s, const char *str, size_t len,
                                     upb_status *status) {
  int n;
-  upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, status);
+  upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, &defs, status);
  if (!defs) return false;
-  bool success = upb_symtab_add(s, defs, n, status);
-  for(int i = 0; i < n; i++) upb_def_unref(defs[i]);
+  bool success = upb_symtab_add(s, defs, n, &defs, status);
  free(defs);
  return success;
 }
--- a/upb/pb/glue.h
+++ b/upb/pb/glue.h
@ -1,7 +1,7 @@
 /*
 * upb - a minimalist implementation of protocol buffers.
 *
- * Copyright (c) 2011 Google Inc.  See LICENSE for details.
+ * Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
 * Author: Josh Haberman <jhaberman@gmail.com>
 *
 * upb's core components like upb_decoder and upb_msg are carefully designed to
@ -34,25 +34,12 @@
 extern "C" {
 #endif

-// Decodes the given string, which must be in protobuf binary format, to the
-// given upb_msg with msgdef "md", storing the status of the operation in "s".
-bool upb_strtomsg(const char *str, size_t len, void *msg,
-                  const upb_msgdef *md, bool allow_jit, upb_status *s);
-
-// Parses the given file into a new message of the given type.  Caller owns
-// the returned message (or NULL if an error occurred).
-void *upb_filetonewmsg(const char *fname, const upb_msgdef *md, upb_status *s);
-
-//void upb_msgtotext(struct _upb_string *str, void *msg,
-//                   struct _upb_msgdef *md, bool single_line);
-
-
 // Loads all defs from the given protobuf binary descriptor, setting default
 // accessors and a default layout on all messages.  The caller owns the
 // returned array of defs, which will be of length *n.  On error NULL is
 // returned and status is set (if non-NULL).
 upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
-                                        upb_status *status);
+                                        void *owner, upb_status *status);

 // Like the previous but also adds the loaded defs to the given symtab.
 bool upb_load_descriptor_into_symtab(upb_symtab *symtab, const char *str,
--- a/upb/pb/textprinter.c
+++ b/upb/pb/textprinter.c
@ -96,7 +96,7 @@ err:
    const upb_fielddef *f = upb_value_getfielddef(fval);                     \
    uint64_t start_ofs = upb_bytesink_getoffset(p->sink);                    \
    CHECK(upb_textprinter_indent(p));                                        \
-    CHECK(upb_bytesink_writestr(p->sink, f->name));                          \
+    CHECK(upb_bytesink_writestr(p->sink, upb_fielddef_name(f)));             \
    CHECK(upb_bytesink_writestr(p->sink, ": "));                             \
    CHECK(upb_bytesink_printf(p->sink, fmt, upb_value_get ## member(val)));  \
    CHECK(upb_textprinter_endfield(p));                                      \
@ -124,7 +124,8 @@ static upb_flow_t upb_textprinter_putenum(void *_p, upb_value fval,
  upb_textprinter *p = _p;
  uint64_t start_ofs = upb_bytesink_getoffset(p->sink);
  const upb_fielddef *f = upb_value_getfielddef(fval);
-  upb_enumdef *enum_def = upb_downcast_enumdef(f->def);
+  const upb_enumdef *enum_def =
+      upb_downcast_enumdef_const(upb_fielddef_subdef(f));
  const char *label = upb_enumdef_iton(enum_def, upb_value_getint32(val));
  if (label) {
    CHECK(upb_bytesink_writestr(p->sink, label));
@ -157,7 +158,7 @@ static upb_sflow_t upb_textprinter_startsubmsg(void *_p, upb_value fval) {
  uint64_t start_ofs = upb_bytesink_getoffset(p->sink);
  const upb_fielddef *f = upb_value_getfielddef(fval);
  CHECK(upb_textprinter_indent(p));
-  CHECK(upb_bytesink_printf(p->sink, "%s {", f->name));
+  CHECK(upb_bytesink_printf(p->sink, "%s {", upb_fielddef_name(f)));
  if (!p->single_line)
    CHECK(upb_bytesink_putc(p->sink, '\n'));
  p->indent_depth++;
--- a/upb/pb/varint.h
+++ b/upb/pb/varint.h
@ -19,6 +19,16 @@
 extern "C" {
 #endif

+// A list of types as they are encoded on-the-wire.
+typedef enum {
+  UPB_WIRE_TYPE_VARINT      = 0,
+  UPB_WIRE_TYPE_64BIT       = 1,
+  UPB_WIRE_TYPE_DELIMITED   = 2,
+  UPB_WIRE_TYPE_START_GROUP = 3,
+  UPB_WIRE_TYPE_END_GROUP   = 4,
+  UPB_WIRE_TYPE_32BIT       = 5,
+} upb_wiretype_t;
+
 // The maximum number of bytes that it takes to encode a 64-bit varint.
 // Note that with a better encoding this could be 9 (TODO: write up a
 // wiki document about this).
--- a/upb/refcount.c
+++ b/upb/refcount.c
@ -0,0 +1,224 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2012 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include <stdlib.h>
+#include <limits.h>
+#include "upb/refcount.h"
+
+// TODO(haberman): require client to define these if ref debugging is on.
+#ifndef UPB_LOCK
+#define UPB_LOCK
+#endif
+
+#ifndef UPB_UNLOCK
+#define UPB_UNLOCK
+#endif
+
+/* arch-specific atomic primitives  *******************************************/
+
+#ifdef UPB_THREAD_UNSAFE  //////////////////////////////////////////////////////
+
+INLINE void upb_atomic_inc(uint32_t *a) { (*a)++; }
+INLINE bool upb_atomic_dec(uint32_t *a) { return --(*a) == 0; }
+
+#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4 ///////////////////
+
+INLINE void upb_atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); }
+INLINE bool upb_atomic_dec(uint32_t *a) {
+  return __sync_sub_and_fetch(a, 1) == 0;
+}
+
+#elif defined(WIN32) ///////////////////////////////////////////////////////////
+
+#include <Windows.h>
+
+INLINE void upb_atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); }
+INLINE bool upb_atomic_dec(upb_atomic_t *a) {
+  return InterlockedDecrement(&a->val) == 0;
+}
+
+#else
+#error Atomic primitives not defined for your platform/CPU.  \
+       Implement them or compile with UPB_THREAD_UNSAFE.
+#endif
+
+// Reserved index values.
+#define UPB_INDEX_UNDEFINED UINT16_MAX
+#define UPB_INDEX_NOT_IN_STACK (UINT16_MAX - 1)
+
+static void upb_refcount_merge(upb_refcount *r, upb_refcount *from) {
+  if (upb_refcount_merged(r, from)) return;
+  *r->count += *from->count;
+  free(from->count);
+  upb_refcount *base = from;
+
+  // Set all refcount pointers in the "from" chain to the merged refcount.
+  do { from->count = r->count; } while ((from = from->next) != base);
+
+  // Merge the two circularly linked lists by swapping their next pointers.
+  upb_refcount *tmp = r->next;
+  r->next = base->next;
+  base->next = tmp;
+}
+
+// Tarjan's algorithm, see:
+//   http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
+
+typedef struct {
+  int index;
+  upb_refcount **stack;
+  int stack_len;
+  upb_getsuccessors *func;
+} upb_tarjan_state;
+
+static void upb_refcount_dofindscc(upb_refcount *obj, upb_tarjan_state *state);
+
+void upb_refcount_visit(upb_refcount *obj, upb_refcount *subobj, void *_state) {
+  upb_tarjan_state *state = _state;
+  if (subobj->index == UPB_INDEX_UNDEFINED) {
+    // Subdef has not yet been visited; recurse on it.
+    upb_refcount_dofindscc(subobj, state);
+    obj->lowlink = UPB_MIN(obj->lowlink, subobj->lowlink);
+  } else if (subobj->index != UPB_INDEX_NOT_IN_STACK) {
+    // Subdef is in the stack and hence in the current SCC.
+    obj->lowlink = UPB_MIN(obj->lowlink, subobj->index);
+  }
+}
+
+static void upb_refcount_dofindscc(upb_refcount *obj, upb_tarjan_state *state) {
+  obj->index = state->index;
+  obj->lowlink = state->index;
+  state->index++;
+  state->stack[state->stack_len++] = obj;
+
+  state->func(obj, state);  // Visit successors.
+
+  if (obj->lowlink == obj->index) {
+    upb_refcount *scc_obj;
+    while ((scc_obj = state->stack[--state->stack_len]) != obj) {
+      upb_refcount_merge(obj, scc_obj);
+      scc_obj->index = UPB_INDEX_NOT_IN_STACK;
+    }
+    obj->index = UPB_INDEX_NOT_IN_STACK;
+  }
+}
+
+bool upb_refcount_findscc(upb_refcount **refs, int n, upb_getsuccessors *func) {
+  // TODO(haberman): allocate less memory.  We can't use n as a bound because
+  // it doesn't include fielddefs.  Could either use a dynamically-resizing
+  // array or think of some other way.
+  upb_tarjan_state state = {0, malloc(UINT16_MAX * sizeof(void*)), 0, func};
+  if (state.stack == NULL) return false;
+  for (int i = 0; i < n; i++)
+    if (refs[i]->index == UPB_INDEX_UNDEFINED)
+      upb_refcount_dofindscc(refs[i], &state);
+  free(state.stack);
+  return true;
+}
+
+
+/* upb_refcount  **************************************************************/
+
+bool upb_refcount_init(upb_refcount *r, void *owner) {
+  r->count = malloc(sizeof(uint32_t));
+  if (!r->count) return false;
+  // Initializing this here means upb_refcount_findscc() can only run once for
+  // each refcount; may need to revise this to be more flexible.
+  r->index = UPB_INDEX_UNDEFINED;
+  r->next = r;
+#ifdef UPB_DEBUG_REFS
+  // We don't detect malloc() failures for UPB_DEBUG_REFS.
+  upb_inttable_init(&r->refs);
+  *r->count = 0;
+  upb_refcount_ref(r, owner);
+#else
+  *r->count = 1;
+#endif
+  return true;
+}
+
+void upb_refcount_uninit(upb_refcount *r) {
+  (void)r;
+#ifdef UPB_DEBUG_REFS
+  assert(upb_inttable_count(&r->refs) == 0);
+  upb_inttable_uninit(&r->refs);
+#endif
+}
+
+// Moves an existing ref from ref_donor to new_owner, without changing the
+// overall ref count.
+void upb_refcount_donateref(upb_refcount *r, void *from, void *to) {
+  (void)r; (void)from; (void)to;
+  assert(from != to);
+#ifdef UPB_DEBUG_REFS
+  upb_refcount_ref(r, to);
+  upb_refcount_unref(r, from);
+#endif
+}
+
+// Thread-safe operations //////////////////////////////////////////////////////
+
+// Ref and unref are thread-safe.
+void upb_refcount_ref(upb_refcount *r, void *owner) {
+  (void)owner;
+  upb_atomic_inc(r->count);
+#ifdef UPB_DEBUG_REFS
+  UPB_LOCK;
+  // Caller must not already own a ref.
+  assert(upb_inttable_lookup(&r->refs, (uintptr_t)owner) == NULL);
+
+  // If a ref is leaked we want to blame the leak on the whoever leaked the
+  // ref, not on who originally allocated the refcounted object.  We accomplish
+  // this as follows.  When a ref is taken in DEBUG_REFS mode, we malloc() some
+  // memory and arrange setup pointers like so:
+  //
+  //   upb_refcount
+  //   +----------+  +---------+
+  //   | count    |<-+         |
+  //   +----------+       +----------+
+  //   | table    |---X-->| malloc'd |
+  //   +----------+       | memory   |
+  //                      +----------+
+  //
+  // Since the "malloc'd memory" is allocated inside of "ref" and free'd in
+  // unref, it will cause a leak if not unref'd.  And since the leaked memory
+  // points to the object itself, the object will be considered "indirectly
+  // lost" by tools like Valgrind and not shown unless requested (which is good
+  // because the object's creator may not be responsible for the leak).  But we
+  // have to hide the pointer marked "X" above from Valgrind, otherwise the
+  // malloc'd memory will appear to be indirectly leaked and the object itself
+  // will still be considered the primary leak.  We hide this pointer from
+  // Valgrind (et all) by doing a bitwise not on it.
+  upb_refcount **target = malloc(sizeof(void*));
+  uintptr_t obfuscated = ~(uintptr_t)target;
+  *target = r;
+  upb_inttable_insert(&r->refs, (uintptr_t)owner, upb_value_uint64(obfuscated));
+  UPB_UNLOCK;
+#endif
+}
+
+bool upb_refcount_unref(upb_refcount *r, void *owner) {
+  (void)owner;
+  bool ret = upb_atomic_dec(r->count);
+#ifdef UPB_DEBUG_REFS
+  UPB_LOCK;
+  upb_value v;
+  bool success = upb_inttable_remove(&r->refs, (uintptr_t)owner, &v);
+  assert(success);
+  if (success) {
+    // Must un-obfuscate the pointer (see above).
+    free((void*)(~upb_value_getuint64(v)));
+  }
+  UPB_UNLOCK;
+#endif
+  if (ret) free(r->count);
+  return ret;
+}
+
+bool upb_refcount_merged(const upb_refcount *r, const upb_refcount *r2) {
+  return r->count == r2->count;
+}
--- a/upb/refcount.h
+++ b/upb/refcount.h
@ -0,0 +1,70 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * A thread-safe refcount that can optionally track references for debugging
+ * purposes.  It helps avoid circular references by allowing a
+ * strongly-connected component in the graph to share a refcount.
+ *
+ * This interface is internal to upb.
+ */
+
+#ifndef UPB_REFCOUNT_H_
+#define UPB_REFCOUNT_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "upb/table.h"
+
+#ifndef NDEBUG
+#define UPB_DEBUG_REFS
+#endif
+
+typedef struct _upb_refcount {
+  uint32_t *count;
+  struct _upb_refcount *next;  // Circularly-linked list of this SCC.
+  uint16_t index;    // For SCC algorithm.
+  uint16_t lowlink;  // For SCC algorithm.
+#ifdef UPB_DEBUG_REFS
+  upb_inttable refs;
+#endif
+} upb_refcount;
+
+// NON THREAD SAFE operations //////////////////////////////////////////////////
+
+// Initializes the refcount with a single ref for the given owner.  Returns
+// NULL if memory could not be allocated.
+bool upb_refcount_init(upb_refcount *r, void *owner);
+
+// Uninitializes the refcount.  May only be called after unref() returns true.
+void upb_refcount_uninit(upb_refcount *r);
+
+// Moves an existing ref from ref_donor to new_owner, without changing the
+// overall ref count.
+void upb_refcount_donateref(upb_refcount *r, void *from, void *to);
+
+// Finds strongly-connected components among some set of objects and merges all
+// refcounts that share a SCC.  The given function will be called when the
+// algorithm needs to visit children of a particular object; the function
+// should call upb_refcount_visit() once for each child obj.
+//
+// Returns false if memory allocation failed.
+typedef void upb_getsuccessors(upb_refcount *obj, void*);
+bool upb_refcount_findscc(upb_refcount **objs, int n, upb_getsuccessors *func);
+void upb_refcount_visit(upb_refcount *obj, upb_refcount *subobj, void *closure);
+
+// Thread-safe operations //////////////////////////////////////////////////////
+
+// Increases the ref count, the new ref is owned by "owner" which must not
+// already own a ref.  Circular reference chains are not allowed.
+void upb_refcount_ref(upb_refcount *r, void *owner);
+
+// Release a ref owned by owner, returns true if that was the last ref.
+bool upb_refcount_unref(upb_refcount *r, void *owner);
+
+// Returns true if these two objects share a refcount.
+bool upb_refcount_merged(const upb_refcount *r, const upb_refcount *r2);
+
+#endif  // UPB_REFCOUNT_H_
--- a/upb/table.c
+++ b/upb/table.c
@ -4,8 +4,10 @@
 * Copyright (c) 2009 Google Inc.  See LICENSE for details.
 * Author: Josh Haberman <jhaberman@gmail.com>
 *
- * There are a few printf's strewn throughout this file, uncommenting them
- * can be useful for debugging.
+ * Implementation is heavily inspired by Lua's ltable.c.
+ *
+ * TODO: for table iteration we use (array - 1) in several places; is this
+ * undefined behavior?  If so find a better solution.
 */

 #include "upb/table.h"
@ -14,6 +16,8 @@
 #include <stdlib.h>
 #include <string.h>

+#define UPB_MAXARRSIZE 16  // 64k.
+
 static const double MAX_LOAD = 0.85;

 // The minimum percentage of an array part that we will allow.  This is a
@ -21,385 +25,319 @@ static const double MAX_LOAD = 0.85;
 // cache effects).  The lower this is, the more memory we'll use.
 static const double MIN_DENSITY = 0.1;

+int upb_log2(uint64_t v) {
+#ifdef __GNUC__
+  int ret = 31 - __builtin_clz(v);
+#else
+  int ret = 0;
+  while (v >>= 1) ret++;
+#endif
+  return UPB_MIN(UPB_MAXARRSIZE, ret);
+}
+
+static upb_tabkey upb_strkey(const char *str) {
+  upb_tabkey k;
+  k.str = (char*)str;
+  return k;
+}
+
 static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
+typedef upb_tabent *upb_hashfunc_t(const upb_table *t, upb_tabkey key);
+typedef bool upb_eqlfunc_t(upb_tabkey k1, upb_tabkey k2);

 /* Base table (shared code) ***************************************************/

-static uint32_t upb_table_size(const upb_table *t) { return 1 << t->size_lg2; }
-static size_t upb_table_entrysize(const upb_table *t) { return t->entry_size; }
-static size_t upb_table_valuesize(const upb_table *t) { return t->value_size; }
+static size_t upb_table_size(const upb_table *t) { return 1 << t->size_lg2; }
+
+static bool upb_table_isfull(upb_table *t) {
+  return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD;
+}

-void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size) {
+static bool upb_table_init(upb_table *t, uint8_t size_lg2) {
  t->count = 0;
-  t->entry_size = entry_size;
-  t->size_lg2 = 1;
-  while(upb_table_size(t) < size) t->size_lg2++;
-  size_t bytes = upb_table_size(t) * t->entry_size;
+  t->size_lg2 = size_lg2;
+  size_t bytes = upb_table_size(t) * sizeof(upb_tabent);
  t->mask = upb_table_size(t) - 1;
  t->entries = malloc(bytes);
+  if (!t->entries) return false;
+  memset(t->entries, 0, bytes);
+  return true;
 }

-void upb_table_free(upb_table *t) { free(t->entries); }
+static void upb_table_uninit(upb_table *t) { free(t->entries); }

-/* upb_inttable ***************************************************************/
+static bool upb_tabent_isempty(const upb_tabent *e) { return e->key.num == 0; }

-static upb_inttable_entry *intent(const upb_inttable *t, int32_t i) {
-  //printf("looking up int entry %d, size of entry: %d\n", i, t->t.entry_size);
-  return UPB_INDEX(t->t.entries, i, t->t.entry_size);
+static upb_tabent *upb_table_emptyent(const upb_table *t) {
+  upb_tabent *e = t->entries + upb_table_size(t);
+  while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); }
 }

-static uint32_t upb_inttable_hashtablesize(const upb_inttable *t) {
-  return upb_table_size(&t->t);
+static upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key,
+                                   upb_hashfunc_t *hash, upb_eqlfunc_t *eql) {
+  upb_tabent *e = hash(t, key);
+  if (upb_tabent_isempty(e)) return NULL;
+  while (1) {
+    if (eql(e->key, key)) return &e->val;
+    if ((e = e->next) == NULL) return NULL;
+  }
 }

-void upb_inttable_sizedinit(upb_inttable *t, uint32_t arrsize, uint32_t hashsize,
-                            uint16_t value_size) {
-  size_t entsize = _upb_inttable_entrysize(value_size);
-  upb_table_init(&t->t, hashsize, entsize);
-  for (uint32_t i = 0; i < upb_table_size(&t->t); i++) {
-    upb_inttable_entry *e = intent(t, i);
-    e->hdr.key = 0;
-    e->hdr.next = UPB_END_OF_CHAIN;
-    e->val.has_entry = 0;
+// The given key must not already exist in the table.
+static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val,
+                             upb_hashfunc_t *hash, upb_eqlfunc_t *eql) {
+  assert(upb_table_lookup(t, key, hash, eql) == NULL);
+  t->count++;
+  upb_tabent *mainpos_e = hash(t, key);
+  upb_tabent *our_e = mainpos_e;
+  if (!upb_tabent_isempty(mainpos_e)) {  // Collision.
+    upb_tabent *new_e = upb_table_emptyent(t);
+    upb_tabent *chain = hash(t, mainpos_e->key);  // Head of collider's chain.
+    if (chain == mainpos_e) {
+      // Existing ent is in its main posisiton (it has the same hash as us, and
+      // is the head of our chain).  Insert to new ent and append to this chain.
+      new_e->next = mainpos_e->next;
+      mainpos_e->next = new_e;
+      our_e = new_e;
+    } else {
+      // Existing ent is not in its main position (it is a node in some other
+      // chain).  This implies that no existing ent in the table has our hash.
+      // Evict it (updating its chain) and use its ent for head of our chain.
+      *new_e = *mainpos_e;  // copies next.
+      while (chain->next != mainpos_e) chain = chain->next;
+      chain->next = new_e;
+      our_e = mainpos_e;
+      our_e->next = NULL;
+    }
  }
-  t->t.value_size = value_size;
-  // Always make the array part at least 1 long, so that we know key 0
-  // won't be in the hash part (which lets us speed up that code path).
-  t->array_size = UPB_MAX(1, arrsize);
-  t->array = malloc(upb_table_valuesize(&t->t) * t->array_size);
-  t->array_count = 0;
-  for (uint32_t i = 0; i < t->array_size; i++) {
-    upb_inttable_value *val = UPB_INDEX(t->array, i, upb_table_valuesize(&t->t));
-    val->has_entry = false;
+  our_e->key = key;
+  our_e->val = val;
+  assert(upb_table_lookup(t, key, hash, eql) == &our_e->val);
+}
+
+static bool upb_table_remove(upb_table *t, upb_tabkey key, upb_value *val,
+                             upb_hashfunc_t *hash, upb_eqlfunc_t *eql) {
+  upb_tabent *chain = hash(t, key);
+  if (eql(chain->key, key)) {
+    t->count--;
+    if (val) *val = chain->val;
+    if (chain->next) {
+      upb_tabent *move = chain->next;
+      *chain = *move;
+      move->key.num = 0;  // Make the slot empty.
+    } else {
+      chain->key.num = 0;  // Make the slot empty.
+    }
+    return true;
+  } else {
+    while (chain->next && !eql(chain->next->key, key))
+      chain = chain->next;
+    if (chain->next) {
+      // Found element to remove.
+      if (val) *val = chain->next->val;
+      chain->next->key.num = 0;
+      chain->next = chain->next->next;
+      t->count--;
+      return true;
+    } else {
+      return false;
+    }
  }
 }

-void upb_inttable_init(upb_inttable *t, uint32_t hashsize, uint16_t value_size) {
-  upb_inttable_sizedinit(t, 0, hashsize, value_size);
+static upb_tabent *upb_table_next(const upb_table *t, upb_tabent *e) {
+  upb_tabent *end = t->entries + upb_table_size(t);
+  do { if (++e == end) return NULL; } while(e->key.num == 0);
+  return e;
 }

-void upb_inttable_free(upb_inttable *t) {
-  upb_table_free(&t->t);
-  free(t->array);
+static upb_tabent *upb_table_begin(const upb_table *t) {
+  return upb_table_next(t, t->entries - 1);
 }

-static uint32_t empty_intbucket(upb_inttable *table)
-{
-  // TODO: does it matter that this is biased towards the front of the table?
-  for(uint32_t i = 0; i < upb_inttable_hashtablesize(table); i++) {
-    upb_inttable_entry *e = intent(table, i);
-    if(!e->val.has_entry) return i;
-  }
-  assert(false);
-  return 0;
+
+/* upb_strtable ***************************************************************/
+
+// A simple "subclass" of upb_table that only adds a hash function for strings.
+
+static upb_tabent *upb_strhash(const upb_table *t, upb_tabkey key) {
+  // Could avoid the strlen() by using a hash function that terminates on NULL.
+  return t->entries + (MurmurHash2(key.str, strlen(key.str), 0) & t->mask);
 }

-// The insert routines have a lot more code duplication between int/string
-// variants than I would like, but there's just a bit too much that varies to
-// parameterize them.
-static void intinsert(upb_inttable *t, uint32_t key, const void *val) {
-  assert(upb_inttable_lookup(t, key) == NULL);
-  upb_inttable_value *table_val;
-  if (_upb_inttable_isarrkey(t, key)) {
-    table_val = UPB_INDEX(t->array, key, upb_table_valuesize(&t->t));
-    t->array_count++;
-    //printf("Inserting key %d to Array part! %p\n", key, table_val);
-  } else {
-    t->t.count++;
-    uint32_t bucket = _upb_inttable_bucket(t, key);
-    upb_inttable_entry *table_e = intent(t, bucket);
-    //printf("Hash part!  Inserting into bucket %d?\n", bucket);
-    if(table_e->val.has_entry) {  /* Collision. */
-      //printf("Collision!\n");
-      if(bucket == _upb_inttable_bucket(t, table_e->hdr.key)) {
-        /* Existing element is in its main posisiton.  Find an empty slot to
-         * place our new element and append it to this key's chain. */
-        uint32_t empty_bucket = empty_intbucket(t);
-        while (table_e->hdr.next != UPB_END_OF_CHAIN)
-          table_e = intent(t, table_e->hdr.next);
-        table_e->hdr.next = empty_bucket;
-        table_e = intent(t, empty_bucket);
-      } else {
-        /* Existing element is not in its main position.  Move it to an empty
-         * slot and put our element in its main position. */
-        uint32_t empty_bucket = empty_intbucket(t);
-        uint32_t evictee_bucket = _upb_inttable_bucket(t, table_e->hdr.key);
-        memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
-        upb_inttable_entry *evictee_e = intent(t, evictee_bucket);
-        while(1) {
-          assert(evictee_e->val.has_entry);
-          assert(evictee_e->hdr.next != UPB_END_OF_CHAIN);
-          if(evictee_e->hdr.next == bucket) {
-            evictee_e->hdr.next = empty_bucket;
-            break;
-          }
-          evictee_e = intent(t, evictee_e->hdr.next);
-        }
-        /* table_e remains set to our mainpos. */
-      }
-    }
-    //printf("Inserting!  to:%p, copying to: %p\n", table_e, &table_e->val);
-    table_val = &table_e->val;
-    table_e->hdr.key = key;
-    table_e->hdr.next = UPB_END_OF_CHAIN;
-  }
-  memcpy(table_val, val, upb_table_valuesize(&t->t));
-  table_val->has_entry = true;
-  assert(upb_inttable_lookup(t, key) == table_val);
+static bool upb_streql(upb_tabkey k1, upb_tabkey k2) {
+  return strcmp(k1.str, k2.str) == 0;
 }

-// Insert all elements from src into dest.  Caller ensures that a resize will
-// not be necessary.
-static void upb_inttable_insertall(upb_inttable *dst, upb_inttable *src) {
-  for(upb_inttable_iter i = upb_inttable_begin(src); !upb_inttable_done(i);
-      i = upb_inttable_next(src, i)) {
-    //printf("load check: %d %d\n", upb_table_count(&dst->t), upb_inttable_hashtablesize(dst));
-    assert((double)(upb_table_count(&dst->t)) /
-                    upb_inttable_hashtablesize(dst) <= MAX_LOAD);
-    intinsert(dst, upb_inttable_iter_key(i), upb_inttable_iter_value(i));
-  }
+bool upb_strtable_init(upb_strtable *t) { return upb_table_init(&t->t, 4); }
+
+void upb_strtable_uninit(upb_strtable *t) {
+  for (size_t i = 0; i < upb_table_size(&t->t); i++)
+    free(t->t.entries[i].key.str);
+  upb_table_uninit(&t->t);
 }

-void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val) {
-  if((double)(t->t.count + 1) / upb_inttable_hashtablesize(t) > MAX_LOAD) {
-    //printf("RESIZE!\n");
-    // Need to resize.  Allocate new table with double the size of however many
-    // elements we have now, add old elements to it.  We create the new hash
-    // table without an array part, even if the old table had an array part.
-    // If/when the user calls upb_inttable_compact() again, we'll create an
-    // array part then.
-    upb_inttable new_table;
-    //printf("Old table count=%d, size=%d\n", upb_inttable_count(t), upb_inttable_hashtablesize(t));
-    upb_inttable_init(&new_table, upb_inttable_count(t)*2, upb_table_valuesize(&t->t));
-    upb_inttable_insertall(&new_table, t);
-    upb_inttable_free(t);
+bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) {
+  if (upb_table_isfull(&t->t)) {
+    // Need to resize.  New table of double the size, add old elements to it.
+    upb_strtable new_table;
+    if (!upb_table_init(&new_table.t, t->t.size_lg2 + 1)) return false;
+    upb_strtable_iter i;
+    upb_strtable_begin(&i, t);
+    for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+      upb_strtable_insert(
+          &new_table, upb_strtable_iter_key(&i), upb_strtable_iter_value(&i));
+    }
+    upb_strtable_uninit(t);
    *t = new_table;
  }
-  intinsert(t, key, val);
+  if ((k = strdup(k)) == NULL) return false;
+  upb_table_insert(&t->t, upb_strkey(k), v, &upb_strhash, &upb_streql);
+  return true;
 }

-void upb_inttable_compact(upb_inttable *t) {
-  // Find the largest array part we can that satisfies the MIN_DENSITY
-  // definition.  For now we just count down powers of two.
-  uint32_t largest_key = 0;
-  for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
-      i = upb_inttable_next(t, i)) {
-    largest_key = UPB_MAX(largest_key, upb_inttable_iter_key(i));
-  }
-  int lg2_array = 0;
-  while ((1UL << lg2_array) < largest_key) ++lg2_array;
-  ++lg2_array;  // Undo the first iteration.
-  size_t array_size = 0;
-  int array_count = 0;
-  while (lg2_array > 0) {
-    array_size = (1 << --lg2_array);
-    //printf("Considering size %d (btw, our table has %d things total)\n", array_size, upb_inttable_count(t));
-    if ((double)upb_inttable_count(t) / array_size < MIN_DENSITY) {
-      // Even if 100% of the keys were in the array pary, an array of this
-      // size would not be dense enough.
-      continue;
-    }
-    array_count = 0;
-    for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
-        i = upb_inttable_next(t, i)) {
-      if (upb_inttable_iter_key(i) < array_size)
-        array_count++;
-    }
-    //printf("There would be %d things in that array\n", array_count);
-    if ((double)array_count / array_size >= MIN_DENSITY) break;
-  }
-  upb_inttable new_table;
-  int hash_size = (upb_inttable_count(t) - array_count + 1) / MAX_LOAD;
-  //printf("array_count: %d, array_size: %d, hash_size: %d, table size: %d\n", array_count, array_size, hash_size, upb_inttable_count(t));
-  upb_inttable_sizedinit(&new_table, array_size, hash_size,
-                         upb_table_valuesize(&t->t));
-  //printf("For %d things, using array size=%d, hash_size = %d\n", upb_inttable_count(t), array_size, hash_size);
-  upb_inttable_insertall(&new_table, t);
-  upb_inttable_free(t);
-  *t = new_table;
+upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key) {
+  return upb_table_lookup(&t->t, upb_strkey(key), &upb_strhash, &upb_streql);
 }

-upb_inttable_iter upb_inttable_begin(const upb_inttable *t) {
-  upb_inttable_iter iter = {-1, NULL, true};  // -1 will overflow to 0 on the first iteration.
-  return upb_inttable_next(t, iter);
+void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
+  i->t = t;
+  i->e = upb_table_begin(&t->t);
 }

-upb_inttable_iter upb_inttable_next(const upb_inttable *t,
-                                    upb_inttable_iter iter) {
-  const size_t hdrsize = sizeof(upb_inttable_header);
-  const size_t entsize = upb_table_entrysize(&t->t);
-  if (iter.array_part) {
-    while (++iter.key < t->array_size) {
-      //printf("considering value %d\n", iter.key);
-      iter.value = UPB_INDEX(t->array, iter.key, t->t.value_size);
-      if (iter.value->has_entry) return iter;
-    }
-    //printf("Done with array part!\n");
-    iter.array_part = false;
-    // Point to the value of the table[-1] entry.
-    iter.value = UPB_INDEX(intent(t, -1), 1, hdrsize);
-  }
-  void *end = intent(t, upb_inttable_hashtablesize(t));
-  // Point to the entry for the value that was previously in iter.
-  upb_inttable_entry *e = UPB_INDEX(iter.value, -1, hdrsize);
-  do {
-    e = UPB_INDEX(e, 1, entsize);
-    //printf("considering value %p (val: %p)\n", e, &e->val);
-    if(e == end) {
-      //printf("No values.\n");
-      iter.value = NULL;
-      return iter;
-    }
-  } while(!e->val.has_entry);
-  //printf("USING VALUE! %p\n", e);
-  iter.key = e->hdr.key;
-  iter.value = &e->val;
-  return iter;
+void upb_strtable_next(upb_strtable_iter *i) {
+  i->e = upb_table_next(&i->t->t, i->e);
 }


-/* upb_strtable ***************************************************************/
+/* upb_inttable ***************************************************************/

-static upb_strtable_entry *strent(const upb_strtable *t, int32_t i) {
-  //fprintf(stderr, "i: %d, table_size: %d\n", i, upb_table_size(&t->t));
-  assert(i <= (int32_t)upb_table_size(&t->t));
-  return UPB_INDEX(t->t.entries, i, t->t.entry_size);
-}
+// For inttables we use a hybrid structure where small keys are kept in an
+// array and large keys are put in the hash table.

-static uint32_t upb_strtable_size(const upb_strtable *t) {
-  return upb_table_size(&t->t);
+static bool upb_inteql(upb_tabkey k1, upb_tabkey k2) {
+  return k1.num == k2.num;
 }

-void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t valuesize) {
-  t->t.value_size = valuesize;
-  size_t entsize = upb_align_up(sizeof(upb_strtable_header) + valuesize, 8);
-  upb_table_init(&t->t, size, entsize);
-  for (uint32_t i = 0; i < upb_table_size(&t->t); i++) {
-    upb_strtable_entry *e = strent(t, i);
-    e->hdr.key = NULL;
-    e->hdr.next = UPB_END_OF_CHAIN;
-  }
+size_t upb_inttable_count(const upb_inttable *t) {
+  return t->t.count + t->array_count;
 }

-void upb_strtable_free(upb_strtable *t) {
-  // Free keys from the strtable.
-  upb_strtable_iter i;
-  for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i))
-    free((char*)upb_strtable_iter_key(&i));
-  upb_table_free(&t->t);
+bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2) {
+  if (!upb_table_init(&t->t, hsize_lg2)) return false;
+  // Always make the array part at least 1 long, so that we know key 0
+  // won't be in the hash part, which simplifies things.
+  t->array_size = UPB_MAX(1, asize);
+  t->array_count = 0;
+  size_t array_bytes = t->array_size * sizeof(upb_value);
+  t->array = malloc(array_bytes);
+  if (!t->array) {
+    upb_table_uninit(&t->t);
+    return false;
+  }
+  memset(t->array, 0xff, array_bytes);
+  return true;
 }

-static uint32_t strtable_bucket(const upb_strtable *t, const char *key) {
-  uint32_t hash = MurmurHash2(key, strlen(key), 0);
-  return (hash & t->t.mask);
+bool upb_inttable_init(upb_inttable *t) {
+  return upb_inttable_sizedinit(t, 0, 4);
 }

-void *upb_strtable_lookup(const upb_strtable *t, const char *key) {
-  uint32_t bucket = strtable_bucket(t, key);
-  upb_strtable_entry *e;
-  do {
-    e = strent(t, bucket);
-    if(e->hdr.key && strcmp(e->hdr.key, key) == 0) return &e->val;
-  } while((bucket = e->hdr.next) != UPB_END_OF_CHAIN);
-  return NULL;
+void upb_inttable_uninit(upb_inttable *t) {
+  upb_table_uninit(&t->t);
+  free(t->array);
 }

-void *upb_strtable_lookupl(const upb_strtable *t, const char *key, size_t len) {
-  // TODO: improve.
-  char *key2 = malloc(len+1);
-  memcpy(key2, key, len);
-  key2[len] = '\0';
-  void *ret = upb_strtable_lookup(t, key2);
-  free(key2);
-  return ret;
+bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
+  assert(upb_arrhas(val));
+  if (key < t->array_size) {
+    assert(!upb_arrhas(t->array[key]));
+    t->array_count++;
+    t->array[key] = val;
+  } else {
+    if (upb_table_isfull(&t->t)) {
+      // Need to resize the hash part, but we re-use the array part.
+      upb_table new_table;
+      if (!upb_table_init(&new_table, t->t.size_lg2 + 1)) return false;
+      upb_tabent *e;
+      for (e = upb_table_begin(&t->t); e; e = upb_table_next(&t->t, e))
+        upb_table_insert(&new_table, e->key, e->val, &upb_inthash, &upb_inteql);
+      upb_table_uninit(&t->t);
+      t->t = new_table;
+    }
+    upb_table_insert(&t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql);
+  }
+  return true;
 }

-static uint32_t empty_strbucket(upb_strtable *table) {
-  // TODO: does it matter that this is biased towards the front of the table?
-  for(uint32_t i = 0; i < upb_strtable_size(table); i++) {
-    upb_strtable_entry *e = strent(table, i);
-    if(!e->hdr.key) return i;
+upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key) {
+  if (key < t->array_size) {
+    upb_value *v = &t->array[key];
+    return upb_arrhas(*v) ? v : NULL;
  }
-  assert(false);
-  return 0;
+  return upb_table_lookup(&t->t, upb_intkey(key), &upb_inthash, &upb_inteql);
 }

-static void strinsert(upb_strtable *t, const char *key, const void *val) {
-  assert(upb_strtable_lookup(t, key) == NULL);
-  t->t.count++;
-  uint32_t bucket = strtable_bucket(t, key);
-  upb_strtable_entry *table_e = strent(t, bucket);
-  if(table_e->hdr.key) {  /* Collision. */
-    if(bucket == strtable_bucket(t, table_e->hdr.key)) {
-      /* Existing element is in its main posisiton.  Find an empty slot to
-       * place our new element and append it to this key's chain. */
-      uint32_t empty_bucket = empty_strbucket(t);
-      while (table_e->hdr.next != UPB_END_OF_CHAIN)
-        table_e = strent(t, table_e->hdr.next);
-      table_e->hdr.next = empty_bucket;
-      table_e = strent(t, empty_bucket);
+bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
+  if (key < t->array_size) {
+    if (upb_arrhas(t->array[key])) {
+      t->array_count--;
+      if (val) *val = t->array[key];
+      t->array[key] = upb_value_uint64(-1);
+      return true;
    } else {
-      /* Existing element is not in its main position.  Move it to an empty
-       * slot and put our element in its main position. */
-      uint32_t empty_bucket = empty_strbucket(t);
-      uint32_t evictee_bucket = strtable_bucket(t, table_e->hdr.key);
-      memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
-      upb_strtable_entry *evictee_e = strent(t, evictee_bucket);
-      while(1) {
-        assert(evictee_e->hdr.key);
-        assert(evictee_e->hdr.next != UPB_END_OF_CHAIN);
-        if(evictee_e->hdr.next == bucket) {
-          evictee_e->hdr.next = empty_bucket;
-          break;
-        }
-        evictee_e = strent(t, evictee_e->hdr.next);
-      }
-      /* table_e remains set to our mainpos. */
+      return false;
    }
+  } else {
+    return upb_table_remove(
+        &t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql);
  }
-  //fprintf(stderr, "val: %p\n", val);
-  //fprintf(stderr, "val size: %d\n", t->t.value_size);
-  memcpy(&table_e->val, val, t->t.value_size);
-  table_e->hdr.key = strdup(key);
-  table_e->hdr.next = UPB_END_OF_CHAIN;
-  //fprintf(stderr, "Looking up, string=%s...\n", key);
-  assert(upb_strtable_lookup(t, key) == &table_e->val);
-  //printf("Yay!\n");
 }

-void upb_strtable_insert(upb_strtable *t, const char *key, const void *val) {
-  if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) {
-    // Need to resize.  New table of double the size, add old elements to it.
-    //printf("RESIZE!!\n");
-    upb_strtable new_table;
-    upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.value_size);
-    upb_strtable_iter i;
-    upb_strtable_begin(&i, t);
-    for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
-      strinsert(&new_table,
-                upb_strtable_iter_key(&i),
-                upb_strtable_iter_value(&i));
-    }
-    upb_strtable_free(t);
-    *t = new_table;
+void upb_inttable_compact(upb_inttable *t) {
+  // Find the largest power of two that satisfies the MIN_DENSITY definition.
+  int counts[UPB_MAXARRSIZE + 1] = {0};
+  upb_inttable_iter i;
+  for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i))
+    counts[upb_log2(upb_inttable_iter_key(&i))]++;
+  int count = upb_inttable_count(t);
+  int size;
+  for (size = UPB_MAXARRSIZE; size > 1; size--) {
+    count -= counts[size];
+    if (count >= (1 << size) * MIN_DENSITY) break;
  }
-  strinsert(t, key, val);
+
+  // Insert all elements into new, perfectly-sized table.
+  upb_inttable new_table;
+  int hashsize = (upb_inttable_count(t) - count + 1) / MAX_LOAD;
+  upb_inttable_sizedinit(&new_table, size, upb_log2(hashsize) + 1);
+  for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i))
+    upb_inttable_insert(
+        &new_table, upb_inttable_iter_key(&i), upb_inttable_iter_value(&i));
+  upb_inttable_uninit(t);
+  *t = new_table;
 }

-void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
-  i->e = strent(t, -1);
+void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
  i->t = t;
-  upb_strtable_next(i);
+  i->arrkey = -1;
+  i->array_part = true;
+  upb_inttable_next(i);
 }

-void upb_strtable_next(upb_strtable_iter *i) {
-  upb_strtable_entry *end = strent(i->t, upb_strtable_size(i->t));
-  upb_strtable_entry *cur = i->e;
-  do {
-    cur = (void*)((char*)cur + i->t->t.entry_size);
-    if(cur == end) { i->e = NULL; return; }
-  } while(cur->hdr.key == NULL);
-  i->e = cur;
+void upb_inttable_next(upb_inttable_iter *iter) {
+  const upb_inttable *t = iter->t;
+  if (iter->array_part) {
+    for (size_t i = iter->arrkey; ++i < t->array_size; )
+      if (upb_arrhas(t->array[i])) {
+        iter->ptr.val = &t->array[i];
+        iter->arrkey = i;
+        return;
+      }
+    iter->array_part = false;
+    iter->ptr.ent = t->t.entries - 1;
+  }
+  iter->ptr.ent = upb_table_next(&t->t, iter->ptr.ent);
 }

 #ifdef UPB_UNALIGNED_READS_OK
@ -413,8 +351,7 @@ void upb_strtable_next(upb_strtable_iter *i) {
 //   1. It will not work incrementally.
 //   2. It will not produce the same results on little-endian and big-endian
 //      machines.
-static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed)
-{
+static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
  // 'm' and 'r' are mixing constants generated offline.
  // They're not really 'magic', they just happen to work well.
  const uint32_t m = 0x5bd1e995;
@ -465,8 +402,7 @@ static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed)

 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }

-static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed)
-{
+static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
  const uint32_t m = 0x5bd1e995;
  const int32_t r = 24;
  const uint8_t * data = (const uint8_t *)key;
--- a/upb/table.h
+++ b/upb/table.h
@ -4,13 +4,16 @@
 * Copyright (c) 2009 Google Inc.  See LICENSE for details.
 * Author: Josh Haberman <jhaberman@gmail.com>
 *
- * This file defines very fast int->struct (inttable) and string->struct
- * (strtable) hash tables.  The struct can be of any size, and it is stored
- * in the table itself, for cache-friendly performance.
+ * This file defines very fast int->upb_value (inttable) and string->upb_value
+ * (strtable) hash tables.
 *
- * The table uses internal chaining with Brent's variation (inspired by the
- * Lua implementation of hash tables).  The hash function for strings is
- * Austin Appleby's "MurmurHash."
+ * The table uses chained scatter with Brent's variation (inspired by the Lua
+ * implementation of hash tables).  The hash function for strings is Austin
+ * Appleby's "MurmurHash."
+ *
+ * The inttable uses uintptr_t as its key, which guarantees it can be used to
+ * store pointers or integers of at least 32 bits (upb isn't really useful on
+ * systems where sizeof(void*) < 4).
 *
 * This header is internal to upb; its interface should not be considered
 * public or stable.
@ -19,52 +22,30 @@
 #ifndef UPB_TABLE_H_
 #define UPB_TABLE_H_

-#include <assert.h>
 #include <stddef.h>
+#include <stdint.h>
 #include "upb.h"

 #ifdef __cplusplus
 extern "C" {
 #endif

-#define UPB_END_OF_CHAIN (uint32_t)-1
-
-typedef struct {
-  bool has_entry:1;
-  // The rest of the bits are the user's.
-} upb_inttable_value;
-
-typedef struct {
-  uint32_t key;
-  uint32_t next;  // Internal chaining.
-} upb_inttable_header;
-
-typedef struct {
-  upb_inttable_header hdr;
-  upb_inttable_value val;
-} upb_inttable_entry;
-
-// TODO: consider storing the hash in the entry.  This would avoid the need to
-// rehash on table resizes, but more importantly could possibly improve lookup
-// performance by letting us compare hashes before comparing lengths or the
-// strings themselves.
-typedef struct {
-  char *key;         // We own, nullz. TODO: store explicit len?
-  uint32_t next;     // Internal chaining.
-} upb_strtable_header;
+typedef union {
+  uintptr_t num;
+  char *str;  // We own, nullz.
+} upb_tabkey;

-typedef struct {
-  upb_strtable_header hdr;
-  uint32_t val;      // Val is at least 32 bits.
-} upb_strtable_entry;
+typedef struct _upb_tabent {
+  upb_tabkey key;
+  upb_value val;
+  struct _upb_tabent *next;  // Internal chaining.
+} upb_tabent;

 typedef struct {
-  void *entries;        // Hash table.
-  uint32_t count;       // Number of entries in the hash part.
-  uint32_t mask;        // Mask to turn hash value -> bucket.
-  uint16_t entry_size;  // Size of each entry.
-  uint16_t value_size;  // Size of each value.
-  uint8_t size_lg2;     // Size of the hash table part is 2^size_lg2 entries.
+  upb_tabent *entries;   // Hash table.
+  size_t count;          // Number of entries in the hash part.
+  size_t mask;           // Mask to turn hash value -> bucket.
+  uint8_t size_lg2;      // Size of the hash table part is 2^size_lg2 entries.
 } upb_table;

 typedef struct {
@ -72,149 +53,124 @@ typedef struct {
 } upb_strtable;

 typedef struct {
-  upb_table t;
-  void *array;           // Array part of the table.
-  uint32_t array_size;   // Array part size.
-  uint32_t array_count;  // Array part number of elements.
+  upb_table t;           // For entries that don't fit in the array part.
+  upb_value *array;      // Array part of the table.
+  size_t array_size;     // Array part size.
+  size_t array_count;    // Array part number of elements.
 } upb_inttable;

-// Initialize and free a table, respectively.  Specify the initial size
-// with 'size' (the size will be increased as necessary).  Value size
-// specifies how many bytes each value in the table is.
-//
-// WARNING!  The lowest bit of every entry is reserved by the hash table.
-// It will always be overwritten when you insert, and must not be modified
-// when looked up!
-void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t value_size);
-void upb_inttable_free(upb_inttable *table);
-void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t value_size);
-void upb_strtable_free(upb_strtable *table);
-
-// Number of values in the hash table.
-INLINE uint32_t upb_table_count(const upb_table *t) { return t->count; }
-INLINE uint32_t upb_inttable_count(const upb_inttable *t) {
-  return t->array_count + upb_table_count(&t->t);
-}
-INLINE uint32_t upb_strtable_count(const upb_strtable *t) {
-  return upb_table_count(&t->t);
+INLINE upb_tabkey upb_intkey(uintptr_t key) { upb_tabkey k = {key}; return k; }
+
+INLINE upb_tabent *upb_inthash(const upb_table *t, upb_tabkey key) {
+  return t->entries + ((uint32_t)key.num & t->mask);
 }

-// Inserts the given key into the hashtable with the given value.  The key must
-// not already exist in the hash table.  The data will be copied from val into
-// the hashtable (the amount of data copied comes from value_size when the
-// table was constructed).  Therefore the data at val may be freed once the
-// call returns.  For string tables, the table takes ownership of the string.
-//
-// WARNING: the lowest bit of val is reserved and will be overwritten!
-void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val);
-// TODO: may want to allow for more complex keys with custom hash/comparison
-// functions.
-void upb_strtable_insert(upb_strtable *t, const char *key, const void *val);
-void upb_inttable_compact(upb_inttable *t);
+INLINE bool upb_arrhas(upb_value v) { return v.val.uint64 != (uint64_t)-1; }

-INLINE uint32_t _upb_inttable_bucket(const upb_inttable *t, uint32_t k) {
-  uint32_t bucket = k & t->t.mask;  // Identity hash for ints.
-  assert(bucket != UPB_END_OF_CHAIN);
-  return bucket;
-}
+// Initialize and uninitialize a table, respectively.  If memory allocation
+// failed, false is returned that the table is uninitialized.
+bool upb_inttable_init(upb_inttable *table);
+bool upb_strtable_init(upb_strtable *table);
+void upb_inttable_uninit(upb_inttable *table);
+void upb_strtable_uninit(upb_strtable *table);

-// Returns true if this key belongs in the array part of the table.
-INLINE bool _upb_inttable_isarrkey(const upb_inttable *t, uint32_t k) {
-  return (k < t->array_size);
-}
+// Returns the number of values in the table.
+size_t upb_inttable_count(const upb_inttable *t);
+INLINE size_t upb_strtable_count(const upb_strtable *t) { return t->t.count; }

-// Looks up key in this table, returning a pointer to the user's inserted data.
-// We have the caller specify the entry_size because fixing this as a literal
-// (instead of reading table->entry_size) gives the compiler more ability to
-// optimize.
+// Inserts the given key into the hashtable with the given value.  The key must
+// not already exist in the hash table.  For string tables, the key must be
+// NULL-terminated, and the table will make an internal copy of the key.
+// Inttables must not insert a value of UINTPTR_MAX.
 //
-// Note: All returned pointers are invalidated by inserts!
-INLINE void *_upb_inttable_fastlookup(const upb_inttable *t, uint32_t key,
-                                      size_t entry_size, size_t value_size) {
-  upb_inttable_value *arrval =
-      (upb_inttable_value*)UPB_INDEX(t->array, key, value_size);
-  if (_upb_inttable_isarrkey(t, key)) {
-    return (arrval->has_entry) ? arrval : NULL;
+// If a table resize was required but memory allocation failed, false is
+// returned and the table is unchanged.
+bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val);
+bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val);
+
+// Looks up key in this table, returning a pointer to the table's internal copy
+// of the user's inserted data, or NULL if this key is not in the table.  The
+// user is free to modify the given upb_value, which will be reflected in any
+// future lookups of this key.  The returned pointer is invalidated by inserts.
+upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key);
+upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key);
+
+// Removes an item from the table.  Returns true if the remove was successful,
+// and stores the removed item in *val if non-NULL.
+bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val);
+
+// Optimizes the table for the current set of entries, for both memory use and
+// lookup time.  Client should call this after all entries have been inserted;
+// inserting more entries is legal, but will likely require a table resize.
+void upb_inttable_compact(upb_inttable *t);
+
+// A special-case inlinable version of the lookup routine for 32-bit integers.
+INLINE upb_value *upb_inttable_lookup32(const upb_inttable *t, uint32_t key) {
+  if (key < t->array_size) {
+    upb_value *v = &t->array[key];
+    return upb_arrhas(*v) ? v : NULL;
  }
-  uint32_t bucket = _upb_inttable_bucket(t, key);
-  upb_inttable_entry *e =
-      (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size);
-  while (1) {
-    if (e->hdr.key == key) {
-      return &e->val;
-    }
-    if ((bucket = e->hdr.next) == UPB_END_OF_CHAIN) return NULL;
-    e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size);
+  for (upb_tabent *e = upb_inthash(&t->t, upb_intkey(key)); true; e = e->next) {
+    if ((uint32_t)e->key.num == key) return &e->val;
+    if (e->next == NULL) return NULL;
  }
 }

-INLINE size_t _upb_inttable_entrysize(size_t value_size) {
-  return upb_align_up(sizeof(upb_inttable_header) + value_size, 8);
-}
-
-INLINE void *upb_inttable_fastlookup(const upb_inttable *t, uint32_t key,
-                                     uint32_t value_size) {
-  return _upb_inttable_fastlookup(
-      t, key, _upb_inttable_entrysize(value_size), value_size);
-}
-
-INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) {
-  return _upb_inttable_fastlookup(t, key, t->t.entry_size, t->t.value_size);
-}
-
-void *upb_strtable_lookupl(const upb_strtable *t, const char *key, size_t len);
-void *upb_strtable_lookup(const upb_strtable *t, const char *key);
-

 /* upb_strtable_iter **********************************************************/

 // Strtable iteration.  Order is undefined.  Insertions invalidate iterators.
 //   upb_strtable_iter i;
-//   for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) {
+//   upb_strtable_begin(&i, t);
+//   for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
 //     const char *key = upb_strtable_iter_key(&i);
 //     const myval *val = upb_strtable_iter_value(&i);
 //     // ...
 //   }
 typedef struct {
  const upb_strtable *t;
-  upb_strtable_entry *e;
+  upb_tabent *e;
 } upb_strtable_iter;

 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t);
 void upb_strtable_next(upb_strtable_iter *i);
 INLINE bool upb_strtable_done(upb_strtable_iter *i) { return i->e == NULL; }
 INLINE const char *upb_strtable_iter_key(upb_strtable_iter *i) {
-  return i->e->hdr.key;
+  return i->e->key.str;
 }
-INLINE const void *upb_strtable_iter_value(upb_strtable_iter *i) {
-  return &i->e->val;
+INLINE upb_value upb_strtable_iter_value(upb_strtable_iter *i) {
+  return i->e->val;
 }


 /* upb_inttable_iter **********************************************************/

 // Inttable iteration.  Order is undefined.  Insertions invalidate iterators.
-//   for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
-//       i = upb_inttable_next(t, i)) {
+//   upb_inttable_iter i;
+//   upb_inttable_begin(&i, t);
+//   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
 //     // ...
 //   }
 typedef struct {
-  uint32_t key;
-  upb_inttable_value *value;
+  const upb_inttable *t;
+  union {
+    upb_tabent *ent;  // For hash iteration.
+    upb_value *val;   // For array iteration.
+  } ptr;
+  uintptr_t arrkey;
  bool array_part;
 } upb_inttable_iter;

-upb_inttable_iter upb_inttable_begin(const upb_inttable *t);
-upb_inttable_iter upb_inttable_next(const upb_inttable *t,
-                                    upb_inttable_iter iter);
-INLINE bool upb_inttable_done(upb_inttable_iter iter) {
-  return iter.value == NULL;
+void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t);
+void upb_inttable_next(upb_inttable_iter *i);
+INLINE bool upb_inttable_done(upb_inttable_iter *i) {
+  return i->ptr.ent == NULL;
 }
-INLINE uint32_t upb_inttable_iter_key(upb_inttable_iter iter) {
-  return iter.key;
+INLINE uintptr_t upb_inttable_iter_key(upb_inttable_iter *i) {
+  return i->array_part ? i->arrkey : i->ptr.ent->key.num;
 }
-INLINE void *upb_inttable_iter_value(upb_inttable_iter iter) {
-  return iter.value;
+INLINE upb_value upb_inttable_iter_value(upb_inttable_iter *i) {
+  return i->array_part ? *i->ptr.val : i->ptr.ent->val;
 }

 #ifdef __cplusplus
--- a/upb/upb.c
+++ b/upb/upb.c
@ -1,47 +1,17 @@
 /*
 * upb - a minimalist implementation of protocol buffers.
 *
- * Copyright (c) 2009 Google Inc.  See LICENSE for details.
+ * Copyright (c) 2009-2012 Google Inc.  See LICENSE for details.
 * Author: Josh Haberman <jhaberman@gmail.com>
 */

 #include <errno.h>
 #include <stdarg.h>
 #include <stddef.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include "upb/descriptor_const.h"
 #include "upb/upb.h"
-#include "upb/bytestream.h"
-
-#define alignof(t) offsetof(struct { char c; t x; }, x)
-#define TYPE_INFO(wire_type, ctype, inmemory_type, is_numeric) \
-    {alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), \
-     #ctype, is_numeric},
-
-const upb_type_info upb_types[] = {
-  // END_GROUP is not real, but used to signify the pseudo-field that
-  // ends a group from within the group.
-  TYPE_INFO(UPB_WIRE_TYPE_END_GROUP,   void*,     MESSAGE, false)   // ENDGROUP
-  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       double,    DOUBLE,  true)    // DOUBLE
-  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       float,     FLOAT,   true)    // FLOAT
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int64_t,   INT64,   true)    // INT64
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint64_t,  UINT64,  true)    // UINT64
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int32_t,   INT32,   true)    // INT32
-  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       uint64_t,  UINT64,  true)    // FIXED64
-  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       uint32_t,  UINT32,  true)    // FIXED32
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      bool,      BOOL,    true)    // BOOL
-  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     STRING,  false)   // STRING
-  TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*,     MESSAGE, false)   // GROUP
-  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     MESSAGE, false)   // MESSAGE
-  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     STRING,  false)   // BYTES
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint32_t,  UINT32,  true)    // UINT32
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint32_t,  INT32,   true)    // ENUM
-  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       int32_t,   INT32,   true)    // SFIXED32
-  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       int64_t,   INT64,   true)    // SFIXED64
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int32_t,   INT32,   true)    // SINT32
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int64_t,   INT64,   true)    // SINT64
-};

 #ifdef NDEBUG
 upb_value UPB_NO_VALUE = {{0}};
@ -142,8 +112,9 @@ bool upb_errno_is_wouldblock() {
 bool upb_posix_codetostr(int code, char *buf, size_t len) {
  if (strerror_r(code, buf, len) == -1) {
    if (errno == EINVAL) {
-      int n = snprintf(buf, len, "Invalid POSIX error number %d\n", code);
-      return n >= (int)len;
+      size_t actual_len =
+          snprintf(buf, len, "Invalid POSIX error number %d\n", code);
+      return actual_len >= len;
    } else if (errno == ERANGE) {
      return false;
    }
--- a/upb/upb.h
+++ b/upb/upb.h
@ -15,9 +15,6 @@
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
-#include <string.h>
-#include "descriptor_const.h"
-#include "atomic.h"

 #ifdef __cplusplus
 extern "C" {
@ -36,20 +33,6 @@ extern "C" {

 #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
 #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
-#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
-
-INLINE void nop_printf(const char *fmt, ...) { (void)fmt; }
-
-#ifdef NDEBUG
-#define DEBUGPRINTF nop_printf
-#else
-#define DEBUGPRINTF printf
-#endif
-
-// Rounds val up to the next multiple of align.
-INLINE uint32_t upb_align_up(uint32_t val, uint32_t align) {
-  return val % align == 0 ? val : val + align - (val % align);
-}

 // The maximum that any submessages can be nested.  Matches proto2's limit.
 // At the moment this specifies the size of several statically-sized arrays
@ -94,73 +77,46 @@ INLINE uint32_t upb_align_up(uint32_t val, uint32_t align) {
 #define UPB_MAX_TYPE_DEPTH 64


-/* Fundamental types and type constants. **************************************/
-
-// A list of types as they are encoded on-the-wire.
-enum upb_wire_type {
-  UPB_WIRE_TYPE_VARINT      = 0,
-  UPB_WIRE_TYPE_64BIT       = 1,
-  UPB_WIRE_TYPE_DELIMITED   = 2,
-  UPB_WIRE_TYPE_START_GROUP = 3,
-  UPB_WIRE_TYPE_END_GROUP   = 4,
-  UPB_WIRE_TYPE_32BIT       = 5,
-};
-
-// Type of a field as defined in a .proto file.  eg. string, int32, etc.  The
-// integers that represent this are defined by descriptor.proto.  Note that
-// descriptor.proto reserves "0" for errors, and we use it to represent
-// exceptional circumstances.
-typedef uint8_t upb_fieldtype_t;
-
-// For referencing the type constants tersely.
-#define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_ ## type
-#define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_ ## type
-
-// Info for a given field type.
-typedef struct {
-  uint8_t align;
-  uint8_t size;
-  uint8_t native_wire_type;
-  uint8_t inmemory_type;    // For example, INT32, SINT32, and SFIXED32 -> INT32
-  const char *ctype;
-  bool is_numeric;  // Only numeric types can be packed.
-} upb_type_info;
-
-// A static array of info about all of the field types, indexed by type number.
-extern const upb_type_info upb_types[];
-
-
 /* upb_value ******************************************************************/

+// Clients should not need to access these enum values; they are used internally
+// to do typechecks of upb_value accesses.
+typedef enum {
+  UPB_CTYPE_INT32 = 1,
+  UPB_CTYPE_INT64 = 2,
+  UPB_CTYPE_UINT32 = 3,
+  UPB_CTYPE_UINT64 = 4,
+  UPB_CTYPE_DOUBLE = 5,
+  UPB_CTYPE_FLOAT = 6,
+  UPB_CTYPE_BOOL = 7,
+  UPB_CTYPE_PTR = 8,
+  UPB_CTYPE_BYTEREGION = 9,
+  UPB_CTYPE_FIELDDEF = 10,
+} upb_ctype_t;
+
 struct _upb_byteregion;
 struct _upb_fielddef;

-// Special constants for the upb_value.type field.  These must not conflict
-// with any members of FieldDescriptorProto.Type.
-#define UPB_TYPE_ENDGROUP 0
-#define UPB_VALUETYPE_FIELDDEF 32
-#define UPB_VALUETYPE_PTR 33
-
 // A single .proto value.  The owner must have an out-of-band way of knowing
 // the type, so that it knows which union member to use.
 typedef struct {
  union {
    uint64_t uint64;
-    double _double;
-    float _float;
    int32_t int32;
    int64_t int64;
    uint32_t uint32;
+    double _double;
+    float _float;
    bool _bool;
+    void *_void;
    struct _upb_byteregion *byteregion;
    const struct _upb_fielddef *fielddef;
-    void *_void;
  } val;

 #ifndef NDEBUG
  // In debug mode we carry the value type around also so we can check accesses
  // to be sure the right member is being read.
-  char type;
+  upb_ctype_t type;
 #endif
 } upb_value;

@ -185,7 +141,7 @@ typedef struct {
    return val.val.membername; \
  } \
  INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \
-    memset(val, 0, sizeof(*val)); \
+    val->val.uint64 = 0; \
    SET_TYPE(val->type, proto_type); \
    val->val.membername = cval; \
  } \
@ -195,21 +151,23 @@ typedef struct {
    return ret; \
  }

-UPB_VALUE_ACCESSORS(double, _double, double, UPB_TYPE(DOUBLE));
-UPB_VALUE_ACCESSORS(float, _float, float, UPB_TYPE(FLOAT));
-UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_TYPE(INT32));
-UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64));
-UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32));
-UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64));
-UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL));
-UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_VALUETYPE_PTR);
+UPB_VALUE_ACCESSORS(int32,  int32,   int32_t,  UPB_CTYPE_INT32);
+UPB_VALUE_ACCESSORS(int64,  int64,   int64_t,  UPB_CTYPE_INT64);
+UPB_VALUE_ACCESSORS(uint32, uint32,  uint32_t, UPB_CTYPE_UINT32);
+UPB_VALUE_ACCESSORS(uint64, uint64,  uint64_t, UPB_CTYPE_UINT64);
+UPB_VALUE_ACCESSORS(double, _double, double,   UPB_CTYPE_DOUBLE);
+UPB_VALUE_ACCESSORS(float,  _float,  float,    UPB_CTYPE_FLOAT);
+UPB_VALUE_ACCESSORS(bool,   _bool,   bool,     UPB_CTYPE_BOOL);
+UPB_VALUE_ACCESSORS(ptr,    _void,   void*,    UPB_CTYPE_PTR);
 UPB_VALUE_ACCESSORS(byteregion, byteregion, struct _upb_byteregion*,
-                    UPB_TYPE(STRING));
+                    UPB_CTYPE_BYTEREGION);

 // upb_fielddef should never be modified from a callback
 // (ie. when they're getting passed through a upb_value).
 UPB_VALUE_ACCESSORS(fielddef, fielddef, const struct _upb_fielddef*,
-                    UPB_VALUETYPE_FIELDDEF);
+                    UPB_CTYPE_FIELDDEF);
+
+#undef UPB_VALUE_ACCESSORS

 extern upb_value UPB_NO_VALUE;

@ -262,7 +220,7 @@ void upb_status_copy(upb_status *to, const upb_status *from);

 extern upb_errorspace upb_posix_errorspace;
 void upb_status_fromerrno(upb_status *status);
-bool upb_errno_is_wouldblock(void);
+bool upb_errno_is_wouldblock();

 // Like vasprintf (which allocates a string large enough for the result), but
 // uses *buf (which can be NULL) as a starting point and reallocates it only if