Sync with 8 months of Google-internal development.

Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink).
12 years ago · 7d3e2bd2c4
parent ea198bdcf9
commit 7d3e2bd2c4
88 changed files with 11187 additions and 7007 deletions
--- a/42
+++ b/42
@ -51,6 +51,7 @@ CXXFLAGS=-Ibindings/cpp
 INCLUDE=-Itests -I.
 CPPFLAGS=$(INCLUDE) -Wall -Wextra $(USER_CFLAGS)
 LDLIBS=-lpthread upb/libupb.a
+LUA=lua5.1  # 5.1 and 5.2 should both be supported

 # Build with "make Q=" to see all commands that are being executed.
 Q=@
@ -84,23 +85,24 @@ CORE= \
  upb/bytestream.c \
  upb/def.c \
  upb/descriptor/reader.c \
+  upb/descriptor/descriptor.upb.c \
+  upb/google/bridge.cc \
+  upb/google/proto2.cc \
  upb/handlers.c \
-  upb/msg.c \
-  upb/refcount.c \
-  upb/stdc/error.c \
-  upb/stdc/io.c \
+  upb/refcounted.c \
+  upb/sink.c \
+  upb/symtab.c \
  upb/table.c \
  upb/upb.c \
-  bindings/cpp/upb/proto2_bridge.cc \

 # TODO: the proto2 bridge should be built as a separate library.

 # Library for the protocol buffer format (both text and binary).
 PB= \
  upb/pb/decoder.c \
-  upb/pb/varint.c \
  upb/pb/glue.c \
  upb/pb/textprinter.c \
+  upb/pb/varint.c \


 # Rules. #######################################################################
@ -170,7 +172,7 @@ upb/def.lo: upb/def.c

 upb/pb/decoder_x64.h: upb/pb/decoder_x64.dasc
 	$(E) DYNASM $<
-	$(Q) lua dynasm/dynasm.lua upb/pb/decoder_x64.dasc > upb/pb/decoder_x64.h
+	$(Q) $(LUA) dynasm/dynasm.lua upb/pb/decoder_x64.dasc > upb/pb/decoder_x64.h

 ifneq ($(shell uname), Darwin)
 upb/pb/jit_debug_elf_file.o: upb/pb/jit_debug_elf_file.s
@ -214,24 +216,36 @@ SIMPLE_TESTS= \
  tests/test_varint \

 SIMPLE_CXX_TESTS= \
-  tests/test_table \
  tests/test_cpp \
-  tests/test_decoder \
+
+  # The build process for this test is complicated and hasn't been
+  # ported to the open-source Makefile yet.
+  # tests/test_decoder \

 VARIADIC_TESTS= \
  tests/t.test_vs_proto2.googlemessage1 \
  tests/t.test_vs_proto2.googlemessage2 \

-TESTS=$(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS) $(VARIADIC_TESTS)
+TESTS=$(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS) $(VARIADIC_TESTS) tests/test_table


 tests: $(TESTS) $(INTERACTIVE_TESTS)
 $(TESTS): $(LIBUPB)
 tests/test_def: tests/test.proto.pb

+tests/testmain.o: tests/testmain.cc
+	$(E) CXX $<
+	$(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $<
+
+$(SIMPLE_TESTS): tests/testmain.o
 $(SIMPLE_TESTS): % : %.c
 	$(E) CC $<
-	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ $< $(LIBUPB)
+	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ tests/testmain.o $< $(LIBUPB)
+
+$(SIMPLE_CXX_TESTS): tests/testmain.o
+$(SIMPLE_CXX_TESTS): % : %.cc
+	$(E) CXX $<
+	$(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -o $@ tests/testmain.o $< $(LIBUPB)

 VALGRIND=valgrind --leak-check=full --error-exitcode=1
 test: tests
@ -258,7 +272,7 @@ tests/t.test_vs_proto2.googlemessage2: \
 	  -DMESSAGE_FILE=\"../benchmarks/google_message1.dat\" \
 	  -DMESSAGE_CIDENT="benchmarks::SpeedMessage1" \
 	  -DMESSAGE_HFILE=\"../benchmarks/google_messages.pb.h\" \
-	  benchmarks/google_messages.pb.cc -lprotobuf -lpthread $(LIBUPB)
+	  benchmarks/google_messages.pb.cc tests/testmain.o -lprotobuf -lpthread $(LIBUPB)
 	$(E) CXX $< '(benchmarks::SpeedMessage2)'
 	$(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -o tests/t.test_vs_proto2.googlemessage2 $< \
 	  -DMESSAGE_NAME=\"benchmarks.SpeedMessage2\" \
@ -266,11 +280,11 @@ tests/t.test_vs_proto2.googlemessage2: \
 	  -DMESSAGE_FILE=\"../benchmarks/google_message2.dat\" \
 	  -DMESSAGE_CIDENT="benchmarks::SpeedMessage2" \
 	  -DMESSAGE_HFILE=\"../benchmarks/google_messages.pb.h\" \
-	  benchmarks/google_messages.pb.cc -lprotobuf -lpthread $(LIBUPB)
+	  benchmarks/google_messages.pb.cc tests/testmain.o -lprotobuf -lpthread $(LIBUPB)
 tests/test_table: tests/test_table.cc
 	@# Includes <hash_set> which is a deprecated header.
 	$(E) CXX $<
-	$(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -Wno-deprecated -o $@ $< $(LIBUPB)
+	$(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -Wno-deprecated -o $@ $< tests/testmain.o $(LIBUPB)

 tests/tests: upb/libupb.a

--- a/83
+++ b/83
@ -34,6 +34,89 @@ the major things that are broken or not yet implemented yet:
 - serialization isn't written yet (only deserialization)


+C/C++ API
+=========
+
+upb's main interfaces are defined in .h files (like upb/def.h).  These header
+files are coded in such a way that they are not only compatible with C and C++
+but provide idiomatic interfaces to both (functions for C, classes for C++).
+
+Here is the general strategy/pattern for this.  I'll explain it piece by piece.
+
+// This defines a type called upb::Foo in C++ or upb_foo in C.  In both cases
+// there is a typedef for upb_foo, which is important since this is how the
+// C functions are defined (which are exposed to both C and C++).
+
+#ifdef __cplusplus
+namespace upb { class Foo; }
+typedef upb::Foo upb_foo;
+extern "C" {
+#else
+struct upb_foo;
+typedef struct upb_foo upb_foo;
+#endif
+
+// Here is the actual definition of the class/struct.  In C++ we get a class
+// called upb::Foo and in C we get a struct called "struct upb_foo", but both
+// have the same members and the C++ version is "standard-layout" according
+// to C++11.  This means that the two should be compatible.
+//
+// In addition to being completely accessible from C, it also provides C++
+// niceities like methods (instead of bare functions).  We also get
+// encapsulation in C++, even though this is impossible to provide in C.  We
+// provide all method documentation in the C++ class, since the class/method
+// syntax is nicer to read than the bare functions of C.
+
+#ifdef __cplusplus
+
+class upb::Foo {
+ public:
+  // Method documentation for DoBar().
+  void DoBar(int32_t x);
+
+  // Method documentation for IsSpicy().
+  bool IsSpicy();
+
+ private:
+
+#else
+struct upb_foo {
+#endif
+  int32_t private_member;
+};
+
+// Next follows the C API, which is how the functionality is actually
+// implemented.  We omit documentation here because everything was documented
+// in the C++ class, and it's easy to match the functions 1:1 to the C++
+// methods.
+void upb_foo_dobar(upb_foo *f, int32_t x);
+bool upb_foo_isspicy(upb_foo *f);
+
+// Finally we include inline definitions of the C++ methods, which are nothing
+// but this wrappers around the C functions.  Since these are inline, the C++
+// API imposes no overhead.
+
+#ifdef __cplusplus
+}  // extern "C"
+
+namespace upb {
+inline void Foo::DoBar(int32_t x) { upb_foo_dobar(this, x); }
+inline bool Foo::IsSpicy() { return upb_foo_isspicy(this); }
+}
+#endif
+
+This scheme works pretty nicely.  It adds a bit of noise to the header file, but
+gives nice, zero-overhead APIs to both C and C++ without having to duplicate
+the API documentation.
+
+The biggest bummer is that there isn't any good way to use C++ inheritance
+even for types which are trying to express inheritance in C.  C++ just doesn't
+give any guarantees about how it will arrange data members in base classes,
+so we can't use C++ inheritance while interoperating with C layouts.  The
+biggest effect of this is that we can't get C++'s nice implicit upcasts; all
+upcasts have to be explicit, which is a pain.
+
+
 CONTACT
 =======

--- a/benchmarks/parsestream.upb.c
+++ b/benchmarks/parsestream.upb.c
@ -27,11 +27,16 @@ static upb_flow_t value(void *closure, upb_value fval, upb_value val) {
  return UPB_CONTINUE;
 }

+void onfreg(void *c, upb_fhandlers *fh, const upb_fielddef *f) {
+  upb_fhandlers_setvalue(fh, &value);
+  upb_fhandlers_setstartsubmsg(fh, &startsubmsg);
+}
+
 static bool initialize()
 {
  // Initialize upb state, decode descriptor.
  upb_status status = UPB_STATUS_INIT;
-  upb_symtab *s = upb_symtab_new(&s);
+  upb_symtab *s = upb_symtab_new();
  upb_load_descriptor_file_into_symtab(s, MESSAGE_DESCRIPTOR_FILE, &status);
  if(!upb_ok(&status)) {
    fprintf(stderr, "Error reading descriptor: %s\n",
@ -44,7 +49,7 @@ static bool initialize()
    fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
    return false;
  }
-  upb_symtab_unref(s, &s);
+  upb_symtab_unref(s);

  // Read the message data itself.
  input_str = upb_readfile(MESSAGE_FILE, &input_len);
@ -55,8 +60,7 @@ static bool initialize()

  upb_handlers *handlers = upb_handlers_new();
  // Cause all messages to be read, but do nothing when they are.
-  upb_handlerset hset = {NULL, NULL, value, startsubmsg, NULL, NULL, NULL};
-  upb_handlers_reghandlerset(handlers, def, &hset);
+  upb_handlers_regmsgdef(handlers, def, NULL, &upb_onfreg_hset, NULL);
  upb_decoder_init(&decoder);
  plan = upb_decoderplan_new(handlers, JIT);
  upb_decoder_resetplan(&decoder, plan, 0);
--- a/bindings/cpp/upb/bytestream.cc
+++ b/bindings/cpp/upb/bytestream.cc
@ -1,39 +0,0 @@
-//
-// upb - a minimalist implementation of protocol buffers.
-//
-// Copyright (c) 2011 Google Inc.  See LICENSE for details.
-// Author: Josh Haberman <jhaberman@gmail.com>
-
-#include "bytestream.hpp"
-
-namespace upb {
-
-upb_bytesrc_vtbl* ByteSourceBase::vtable() {
-  static upb_bytesrc_vtbl vtbl = {
-    &ByteSourceBase::VFetch,
-    &ByteSourceBase::VDiscard,
-    &ByteSourceBase::VCopy,
-    &ByteSourceBase::VGetPtr,
-  };
-  return &vtbl;
-}
-
-upb_bytesuccess_t ByteSourceBase::VFetch(void *src, uint64_t ofs, size_t *len) {
-  return static_cast<ByteSourceBase*>(src)->Fetch(ofs, len);
-}
-
-void ByteSourceBase::VCopy(
-    const void *src, uint64_t ofs, size_t len, char* dest) {
-  static_cast<const ByteSourceBase*>(src)->Copy(ofs, len, dest);
-}
-
-void ByteSourceBase::VDiscard(void *src, uint64_t ofs) {
-  static_cast<ByteSourceBase*>(src)->Discard(ofs);
-}
-
-const char * ByteSourceBase::VGetPtr(
-    const void *src, uint64_t ofs, size_t* len) {
-  return static_cast<const ByteSourceBase*>(src)->GetPtr(ofs, len);
-}
-
-}  // namespace upb
--- a/bindings/cpp/upb/bytestream.hpp
+++ b/bindings/cpp/upb/bytestream.hpp
@ -1,276 +0,0 @@
-//
-// upb - a minimalist implementation of protocol buffers.
-//
-// Copyright (c) 2011 Google Inc.  See LICENSE for details.
-// Author: Josh Haberman <jhaberman@gmail.com>
-//
-// This file defines three core interfaces:
-// - upb::ByteSink: for writing streams of data.
-// - upb::ByteSource: for reading streams of data.
-// - upb::ByteRegion: for reading from a specific region of a ByteSource;
-//   should be used by decoders instead of using a ByteSource directly.
-//
-// These interfaces are used by streaming encoders and decoders: for example, a
-// protobuf parser gets its input from a upb::ByteRegion.  They are virtual
-// base classes so concrete implementations can get the data from a fd, a
-// FILE*, a string, etc.
-//
-// A ByteRegion represents a region of data from a ByteSource.
-//
-// Parsers get data from this interface instead of a bytesrc because we often
-// want to parse only a specific region of the input.  For example, if we parse
-// a string from our input but know that the string represents a protobuf, we
-// can pass its ByteRegion to an appropriate protobuf parser.
-//
-// Since the bytes may be coming from a file or network socket, bytes must be
-// fetched before they can be read (though in some cases this fetch may be a
-// no-op).  "fetch" is the only operation on a byteregion that could fail or
-// block, because it is the only operation that actually performs I/O.
-//
-// Bytes can be discarded when they are no longer needed.  Parsers should
-// always discard bytes they no longer need, both so the buffers can be freed
-// when possible and to give better visibility into what bytes the parser is
-// still using.
-//
-// start      discard                     read             fetch             end
-// ofs          ofs                       ofs               ofs              ofs
-// |             |--->Discard()            |                 |--->Fetch()      |
-// V             V                         V                 V                 V
-// +-------------+-------------------------+-----------------+-----------------+
-// |  discarded  |                         |                 |    fetchable    |
-// +-------------+-------------------------+-----------------+-----------------+
-//               | <------------- loaded ------------------> |
-//                                         | <- available -> |
-//                                         | <---------- remaining ----------> |
-//
-// Note that the start offset may be something other than zero!  A byteregion
-// is a view into an underlying bytesrc stream, and the region may start
-// somewhere other than the beginning of that stream.
-//
-// The region can be either delimited or nondelimited.  A non-delimited region
-// will keep returning data until the underlying data source returns EOF.  A
-// delimited region will return EOF at a predetermined offset.
-//
-//                       end
-//                       ofs
-//                         |
-//                         V
-// +-----------------------+
-// |  delimited region     |   <-- hard EOF, even if data source has more data.
-// +-----------------------+
-//
-// +------------------------
-// | nondelimited region   Z   <-- won't return EOF until data source hits EOF.
-// +------------------------
-
-#ifndef UPB_BYTESTREAM_HPP
-#define UPB_BYTESTREAM_HPP
-
-#include "upb/bytestream.h"
-#include "upb/upb.hpp"
-#include <string>
-
-namespace upb {
-
-typedef upb_bytesuccess_t ByteSuccess;
-
-// Implement this interface to vend bytes to ByteRegions which will be used by
-// a decoder.
-class ByteSourceBase : public upb_bytesrc {
- public:
-  ByteSourceBase() { upb_bytesrc_init(this, vtable()); }
-  virtual ~ByteSourceBase() { upb_bytesrc_uninit(this); }
-
-  // Fetches at least one byte starting at ofs, setting *len to the actual
-  // number of bytes fetched (or 0 on EOF or error: see return value for
-  // details).  It is valid for bytes to be fetched multiple times, as long as
-  // the bytes have not been previously discarded.
-  virtual ByteSuccess Fetch(uint64_t ofs, size_t* len) = 0;
-
-  // Discards all data prior to ofs (except data that is pinned, if pinning
-  // support is added -- see TODO below).
-  virtual void Discard(uint64_t ofs) = 0;
-
-  // Copies "len" bytes of data from ofs to "dst", which must be at least "len"
-  // bytes long.  The given region must not be discarded.
-  virtual void Copy(uint64_t ofs, size_t len, char *dst) const = 0;
-
-  // Returns a pointer to the bytesrc's internal buffer, storing in *len how
-  // much data is available.  The given offset must not be discarded.  The
-  // returned buffer is valid for as long as its bytes are not discarded (in
-  // the case that part of the returned buffer is discarded, only the
-  // non-discarded bytes remain valid).
-  virtual const char *GetPtr(uint64_t ofs, size_t *len) const = 0;
-
-  // TODO: Add if/when there is a demonstrated need:
-  //
-  // // When the caller pins a region (which must not be already discarded), it
-  // // is guaranteed that the region will not be discarded (nor will the
-  // // bytesrc be destroyed) until the region is unpinned.  However, not all
-  // // bytesrc's support pinning; a false return indicates that a pin was not
-  // // possible.
-  // virtual bool Pin(uint64_t ofs, size_t len);
-  //
-  // // Releases some number of pinned bytes from the beginning of a pinned
-  // // region (which may be fewer than the total number of bytes pinned).
-  // virtual void Unpin(uint64_t ofs, size_t len, size_t bytes_to_release);
-  //
-  // Adding pinning support would also involve adding a "pin_ofs" parameter to
-  // upb_bytesrc_fetch, so that the fetch can extend an already-pinned region.
- private:
-  static upb_bytesrc_vtbl* vtable();
-  static upb_bytesuccess_t VFetch(void*, uint64_t, size_t*);
-  static void VDiscard(void*, uint64_t);
-  static void VCopy(const void*, uint64_t, size_t, char*);
-  static const char *VGetPtr(const void*, uint64_t, size_t*);
-};
-
-class ByteRegion : public upb_byteregion {
- public:
-  static const uint64_t kNondelimited = UPB_NONDELIMITED;
-
-  ByteRegion() { upb_byteregion_init(this); }
-  ~ByteRegion() { upb_byteregion_uninit(this); }
-
-  // Accessors for the regions bounds -- the meaning of these is described in
-  // the diagram above.
-  uint64_t start_ofs() const { return upb_byteregion_startofs(this); }
-  uint64_t discard_ofs() const { return upb_byteregion_discardofs(this); }
-  uint64_t fetch_ofs() const { return upb_byteregion_fetchofs(this); }
-  uint64_t end_ofs() const { return upb_byteregion_endofs(this); }
-
-  // Returns how many bytes are fetched and available for reading starting from
-  // offset "offset".
-  uint64_t BytesAvailable(uint64_t offset) const {
-    return upb_byteregion_available(this, offset);
-  }
-
-  // Returns the total number of bytes remaining after offset "offset", or
-  // kNondelimited if the byteregion is non-delimited.
-  uint64_t BytesRemaining(uint64_t offset) const {
-    return upb_byteregion_remaining(this, offset);
-  }
-
-  uint64_t Length() const { return upb_byteregion_len(this); }
-
-  // Sets the value of this byteregion to be a subset of the given byteregion's
-  // data.  The caller is responsible for releasing this region before the src
-  // region is released (unless the region is first pinned, if pinning support
-  // is added.  see below).
-  void Reset(const upb_byteregion *src, uint64_t ofs, uint64_t len) {
-    upb_byteregion_reset(this, src, ofs, len);
-  }
-  void Release() { upb_byteregion_release(this); }
-
-  // Attempts to fetch more data, extending the fetched range of this
-  // byteregion.  Returns true if the fetched region was extended by at least
-  // one byte, false on EOF or error (see *s for details).
-  ByteSuccess Fetch() { return upb_byteregion_fetch(this); }
-
-  // Fetches all remaining data, returning false if the operation failed (see
-  // *s for details).  May only be used on delimited byteregions.
-  ByteSuccess FetchAll() { return upb_byteregion_fetchall(this); }
-
-  // Discards bytes from the byteregion up until ofs (which must be greater or
-  // equal to discard_ofs()).  It is valid to discard bytes that have not been
-  // fetched (such bytes will never be fetched) but it is an error to discard
-  // past the end of a delimited byteregion.
-  void Discard(uint64_t ofs) { return upb_byteregion_discard(this, ofs); }
-
-  // Copies "len" bytes of data into "dst", starting at ofs.  The specified
-  // region must be available.
-  void Copy(uint64_t ofs, size_t len, char *dst) const {
-    upb_byteregion_copy(this, ofs, len, dst);
-  }
-
-  // Copies all bytes from the byteregion into dst.  Requires that the entire
-  // byteregion is fetched and that none has been discarded.
-  void CopyAll(char *dst) const {
-    upb_byteregion_copyall(this, dst);
-  }
-
-  // Returns a pointer to the internal buffer for the byteregion starting at
-  // offset "ofs." Stores the number of bytes available in this buffer in *len.
-  // The returned buffer is invalidated when the byteregion is reset or
-  // released, or when the bytes are discarded.  If the byteregion is not
-  // currently pinned, the pointer is only valid for the lifetime of the parent
-  // byteregion.
-  const char *GetPtr(uint64_t ofs, size_t *len) const {
-    return upb_byteregion_getptr(this, ofs, len);
-  }
-
-  // Copies the contents of the byteregion into a newly-allocated,
-  // NULL-terminated string.  Requires that the byteregion is fully fetched.
-  char *StrDup() const {
-    return upb_byteregion_strdup(this);
-  }
-
-  template <typename T> void AssignToString(T* str) {
-    uint64_t ofs = start_ofs();
-    size_t len;
-    const char *ptr = GetPtr(ofs, &len);
-    // Emperically calling reserve() here is counterproductive and slows down
-    // benchmarks.  If the parsing is happening in a tight loop that is reusing
-    // the string object, there is probably enough data reserved already and
-    // the reserve() call is extra overhead.
-    str->assign(ptr, len);
-    ofs += len;
-    while (ofs < end_ofs()) {
-      ptr = GetPtr(ofs, &len);
-      str->append(ptr, len);
-      ofs += len;
-    }
-  }
-
-  // TODO: add if/when there is a demonstrated need.
-  //
-  // // Pins this byteregion's bytes in memory, allowing it to outlive its
-  // // parent byteregion.  Normally a byteregion may only be used while its
-  // // parent is still valid, but a pinned byteregion may continue to be used
-  // // until it is reset or released.  A byteregion must be fully fetched to
-  // // be pinned (this implies that the byteregion must be delimited).
-  // //
-  // // In some cases this operation may cause the input data to be copied.
-  // //
-  // // void Pin();
-};
-
-class StringSource : public upb_stringsrc {
- public:
-  StringSource() : upb_stringsrc() { upb_stringsrc_init(this); }
-  template <typename T> explicit StringSource(const T& str) {
-    upb_stringsrc_init(this);
-    Reset(str);
-  }
-  StringSource(const char *data, size_t len) {
-    upb_stringsrc_init(this);
-    Reset(data, len);
-  }
-  ~StringSource() { upb_stringsrc_uninit(this); }
-
-  void Reset(const char* data, size_t len) {
-    upb_stringsrc_reset(this, data, len);
-  }
-
-  template <typename T> void Reset(const T& str) {
-    Reset(str.c_str(), str.size());
-  }
-
-  ByteRegion* AllBytes() {
-    return static_cast<ByteRegion*>(upb_stringsrc_allbytes(this));
-  }
-
-  upb_bytesrc* ByteSource() { return upb_stringsrc_bytesrc(this); }
-};
-
-template <> inline ByteRegion* GetValue<ByteRegion*>(Value v) {
-  return static_cast<ByteRegion*>(upb_value_getbyteregion(v));
-}
-
-template <> inline Value MakeValue<ByteRegion*>(ByteRegion* v) {
-  return upb_value_byteregion(v);
-}
-
-}  // namespace upb
-
-#endif
--- a/bindings/cpp/upb/def.hpp
+++ b/bindings/cpp/upb/def.hpp
@ -1,462 +0,0 @@
-//
-// upb - a minimalist implementation of protocol buffers.
-//
-// Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
-// Author: Josh Haberman <jhaberman@gmail.com>
-//
-// The set of upb::*Def classes and upb::SymbolTable allow for defining and
-// manipulating schema information (as defined in .proto files).
-//
-// Defs go through two distinct phases of life:
-//
-// 1. MUTABLE: when first created, the properties of the def can be set freely
-//    (for example a message's name, its list of fields, the name/number of
-//    fields, etc).  During this phase the def is *not* thread-safe, and may
-//    not be used for any purpose except to set its properties (it can't be
-//    used to parse anything, create any messages in memory, etc).
-//
-// 2. FINALIZED: the Def::Finzlie() operation finalizes a set of defs,
-//    which makes them thread-safe and immutable.  Finalized defs may only be
-//    accessed through a CONST POINTER.  If you want to modify an existing
-//    immutable def, copy it with Dup() and modify and finalize the copy.
-//
-// The refcounting of defs works properly no matter what state the def is in.
-// Once the def is finalized it is guaranteed that any def reachable from a
-// live def is also live (so a ref on the base of a message tree keeps the
-// whole tree alive).
-//
-// You can test for which stage of life a def is in by calling IsMutable().
-// This is particularly useful for dynamic language bindings, which must
-// properly guarantee that the dynamic language cannot break the rules laid out
-// above.
-//
-// It would be possible to make the defs thread-safe during stage 1 by using
-// mutexes internally and changing any methods returning pointers to return
-// copies instead.  This could be important if we are integrating with a VM or
-// interpreter that does not naturally serialize access to wrapped objects (for
-// example, in the case of Python this is not necessary because of the GIL).
-
-#ifndef UPB_DEF_HPP
-#define UPB_DEF_HPP
-
-#include <algorithm>
-#include <string>
-#include <vector>
-#include "upb/def.h"
-#include "upb/upb.hpp"
-
-namespace upb {
-
-class Def;
-class MessageDef;
-
-typedef upb_fieldtype_t FieldType;
-typedef upb_label_t Label;
-
-class FieldDef : public upb_fielddef {
- public:
-  static FieldDef* Cast(upb_fielddef *f) { return static_cast<FieldDef*>(f); }
-  static const FieldDef* Cast(const upb_fielddef *f) {
-    return static_cast<const FieldDef*>(f);
-  }
-
-  static FieldDef* New(const void *owner) {
-    return Cast(upb_fielddef_new(owner));
-  }
-  FieldDef* Dup(const void *owner) const {
-    return Cast(upb_fielddef_dup(this, owner));
-  }
-  void Ref(const void *owner) { upb_fielddef_ref(this, owner); }
-  void Unref(const void *owner) { upb_fielddef_unref(this, owner); }
-
-  bool IsMutable() const { return upb_fielddef_ismutable(this); }
-  bool IsFinalized() const { return upb_fielddef_isfinalized(this); }
-  bool IsString() const { return upb_isstring(this); }
-  bool IsSequence() const { return upb_isseq(this); }
-  bool IsSubmessage() const { return upb_issubmsg(this); }
-
-  // Simple accessors. /////////////////////////////////////////////////////////
-
-  FieldType type() const { return upb_fielddef_type(this); }
-  Label label() const { return upb_fielddef_label(this); }
-  int32_t number() const { return upb_fielddef_number(this); }
-  std::string name() const { return std::string(upb_fielddef_name(this)); }
-  Value default_() const { return upb_fielddef_default(this); }
-  Value bound_value() const { return upb_fielddef_fval(this); }
-  uint16_t offset() const { return upb_fielddef_offset(this); }
-  int16_t hasbit() const { return upb_fielddef_hasbit(this); }
-
-  bool set_type(FieldType type) { return upb_fielddef_settype(this, type); }
-  bool set_label(Label label) { return upb_fielddef_setlabel(this, label); }
-  void set_offset(uint16_t offset) { upb_fielddef_setoffset(this, offset); }
-  void set_hasbit(int16_t hasbit) { upb_fielddef_sethasbit(this, hasbit); }
-  void set_fval(Value fval) { upb_fielddef_setfval(this, fval); }
-  void set_accessor(struct _upb_accessor_vtbl* vtbl) {
-    upb_fielddef_setaccessor(this, vtbl);
-  }
-  MessageDef* message();
-  const MessageDef* message() const;
-
-  struct _upb_accessor_vtbl *accessor() const {
-    return upb_fielddef_accessor(this);
-  }
-
-  // "Number" and "name" must be set before the fielddef is added to a msgdef.
-  // For the moment we do not allow these to be set once the fielddef is added
-  // to a msgdef -- this could be relaxed in the future.
-  bool set_number(int32_t number) {
-    return upb_fielddef_setnumber(this, number);
-  }
-  bool set_name(const char *name) { return upb_fielddef_setname(this, name); }
-  bool set_name(const std::string& name) { return set_name(name.c_str()); }
-
-  // Default value. ////////////////////////////////////////////////////////////
-
-  // Returns the default value for this fielddef, which may either be something
-  // the client set explicitly or the "default default" (0 for numbers, empty
-  // for strings).  The field's type indicates the type of the returned value,
-  // except for enum fields that are still mutable.
-  //
-  // For enums the default can be set either numerically or symbolically -- the
-  // upb_fielddef_default_is_symbolic() function below will indicate which it
-  // is.  For string defaults, the value will be a upb_byteregion which is
-  // invalidated by any other non-const call on this object.  Once the fielddef
-  // is finalized, symbolic enum defaults are resolved, so finalized enum
-  // fielddefs always have a default of type int32.
-  Value defaultval() { return upb_fielddef_default(this); }
-
-  // Sets default value for the field.  For numeric types, use
-  // upb_fielddef_setdefault(), and "value" must match the type of the field.
-  // For string/bytes types, use upb_fielddef_setdefaultstr().  Enum types may
-  // use either, since the default may be set either numerically or
-  // symbolically.
-  //
-  // NOTE: May only be called for fields whose type has already been set.
-  // Also, will be reset to default if the field's type is set again.
-  void set_default(Value value) { upb_fielddef_setdefault(this, value); }
-  void set_default(const char *str) { upb_fielddef_setdefaultcstr(this, str); }
-  void set_default(const char *str, size_t len) {
-    upb_fielddef_setdefaultstr(this, str, len);
-  }
-  void set_default(const std::string& str) {
-    upb_fielddef_setdefaultstr(this, str.c_str(), str.size());
-  }
-
-  // The results of this function are only meaningful for mutable enum fields,
-  // which can have a default specified either as an integer or as a string.
-  // If this returns true, the default returned from upb_fielddef_default() is
-  // a string, otherwise it is an integer.
-  bool DefaultIsSymbolic() { return upb_fielddef_default_is_symbolic(this); }
-
-  // Subdef. ///////////////////////////////////////////////////////////////////
-
-  // Submessage and enum fields must reference a "subdef", which is the
-  // MessageDef or EnumDef that defines their type.  Note that when the
-  // FieldDef is mutable it may not have a subdef *yet*, but this still returns
-  // true to indicate that the field's type requires a subdef.
-  bool HasSubDef() { return upb_hassubdef(this); }
-
-  // Before a FieldDef is finalized, its subdef may be set either directly
-  // (with a Def*) or symbolically.  Symbolic refs must be resolved by the
-  // client before the containing msgdef can be finalized.
-  //
-  // Both methods require that HasSubDef() (so the type must be set prior to
-  // calling these methods).  Returns false if this is not the case, or if the
-  // given subdef is not of the correct type.  The subtype is reset if the
-  // field's type is changed.
-  bool set_subdef(Def* def);
-  bool set_subtype_name(const char *name) {
-    return upb_fielddef_setsubtypename(this, name);
-  }
-  bool set_subtype_name(const std::string& str) {
-    return set_subtype_name(str.c_str());
-  }
-
-  // Returns the enum or submessage def or symbolic name for this field, if
-  // any.  May only be called for fields where HasSubDef() is true.  Returns
-  // NULL if the subdef has not been set or if you ask for a subtype name when
-  // the subtype is currently set symbolically (or vice-versa).
-  //
-  // Caller does *not* own a ref on the returned def or string.
-  // subtypename_name() is non-const because only mutable defs can have the
-  // subtype name set symbolically (symbolic references must be resolved before
-  // the MessageDef can be finalized).
-  const Def* subdef() const;
-  const char *subtype_name() { return upb_fielddef_subtypename(this); }
-
- private:
-  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(FieldDef);
-};
-
-class Def : public upb_def {
- public:
-  // Converting from C types to C++ wrapper types.
-  static Def* Cast(upb_def *def) { return static_cast<Def*>(def); }
-  static const Def* Cast(const upb_def *def) {
-    return static_cast<const Def*>(def);
-  }
-
-  void Ref(const void *owner) const { upb_def_ref(this, owner); }
-  void Unref(const void *owner) const { upb_def_unref(this, owner); }
-
-  void set_full_name(const char *name) { upb_def_setfullname(this, name); }
-  void set_full_name(const std::string& name) {
-    upb_def_setfullname(this, name.c_str());
-  }
-
-  const char *full_name() const { return upb_def_fullname(this); }
-
-  // Finalizes the given list of defs (as well as the fielddefs for the given
-  // msgdefs).  All defs reachable from any def in this list must either be
-  // already finalized or elsewhere in the list.  Any symbolic references to
-  // enums or submessages must already have been resolved.  Returns true on
-  // success, otherwise false is returned and status contains details.  In the
-  // error case the input defs are unmodified.  See the comment at the top of
-  // this file for the semantics of finalized defs.
-  //
-  // n is currently limited to 64k defs, if more are required break them into
-  // batches of 64k (or we could raise this limit, at the cost of a bigger
-  // upb_def structure or complexity in upb_def_finalize()).
-  static bool Finalize(Def*const* defs, int n, Status* status) {
-    return upb_finalize(reinterpret_cast<upb_def*const*>(defs), n, status);
-  }
-  static bool Finalize(const std::vector<Def*>& defs, Status* status) {
-    return Finalize(&defs[0], defs.size(), status);
-  }
-};
-
-class MessageDef : public upb_msgdef {
- public:
-  // Converting from C types to C++ wrapper types.
-  static MessageDef* Cast(upb_msgdef *md) {
-    return static_cast<MessageDef*>(md);
-  }
-  static const MessageDef* Cast(const upb_msgdef *md) {
-    return static_cast<const MessageDef*>(md);
-  }
-  static MessageDef* DynamicCast(Def* def) {
-    return Cast(upb_dyncast_msgdef(def));
-  }
-  static const MessageDef* DynamicCast(const Def* def) {
-    return Cast(upb_dyncast_msgdef_const(def));
-  }
-
-  Def* AsDef() { return Def::Cast(UPB_UPCAST(this)); }
-  const Def* AsDef() const { return Def::Cast(UPB_UPCAST(this)); }
-
-  static MessageDef* New(void *owner) { return Cast(upb_msgdef_new(owner)); }
-  MessageDef* Dup(void *owner) const {
-    return Cast(upb_msgdef_dup(this, owner));
-  }
-
-  void Ref(const void *owner) const { upb_msgdef_ref(this, owner); }
-  void Unref(const void *owner) const { upb_msgdef_unref(this, owner); }
-
-  // Read accessors -- may be called at any time.
-
-  const char *full_name() const { return AsDef()->full_name(); }
-
-  // The total size of in-memory messages created with this MessageDef.
-  uint16_t instance_size() const { return upb_msgdef_size(this); }
-
-  // The number of "hasbit" bytes in a message instance.
-  uint8_t hasbit_bytes() const { return upb_msgdef_hasbit_bytes(this); }
-
-  uint32_t extension_start() const { return upb_msgdef_extstart(this); }
-  uint32_t extension_end() const { return upb_msgdef_extend(this); }
-
-  // Write accessors.  May only be called before the msgdef is in a symtab.
-
-  void set_full_name(const char *name) { AsDef()->set_full_name(name); }
-  void set_full_name(const std::string& name) { AsDef()->set_full_name(name); }
-
-  void set_instance_size(uint16_t size) { upb_msgdef_setsize(this, size); }
-  void set_hasbit_bytes(uint16_t size) { upb_msgdef_setsize(this, size); }
-  bool SetExtensionRange(uint32_t start, uint32_t end) {
-    return upb_msgdef_setextrange(this, start, end);
-  }
-
-  // Adds a set of fields (FieldDef objects) to a MessageDef.  Caller passes a
-  // ref on the FieldDef to the MessageDef in both success and failure cases.
-  // May only be done before the MessageDef is in a SymbolTable (requires
-  // m->IsMutable() for the MessageDef).  The FieldDef's name and number must
-  // be set, and the message may not already contain any field with this name
-  // or number, and this FieldDef may not be part of another message, otherwise
-  // false is returned and the MessageDef is unchanged.
-  bool AddField(FieldDef* f, const void *owner) {
-    return AddFields(&f, 1, owner);
-  }
-  bool AddFields(FieldDef*const * f, int n, const void *owner) {
-    return upb_msgdef_addfields(this, (upb_fielddef*const*)f, n, owner);
-  }
-  bool AddFields(const std::vector<FieldDef*>& fields, const void *owner) {
-    return AddFields(&fields[0], fields.size(), owner);
-  }
-
-  int field_count() const { return upb_msgdef_numfields(this); }
-
-  // Lookup fields by name or number, returning NULL if no such field exists.
-  FieldDef* FindFieldByName(const char *name) {
-    return FieldDef::Cast(upb_msgdef_ntof(this, name));
-  }
-  FieldDef* FindFieldByName(const std::string& name) {
-    return FieldDef::Cast(upb_msgdef_ntof(this, name.c_str()));
-  }
-  FieldDef* FindFieldByNumber(uint32_t num) {
-    return FieldDef::Cast(upb_msgdef_itof(this, num));
-  }
-
-  const FieldDef* FindFieldByName(const char *name) const {
-    return FindFieldByName(name);
-  }
-  const FieldDef* FindFieldByName(const std::string& name) const {
-    return FindFieldByName(name);
-  }
-  const FieldDef* FindFieldByNumber(uint32_t num) const {
-    return FindFieldByNumber(num);
-  }
-
-  class Iterator : public upb_msg_iter {
-   public:
-    explicit Iterator(MessageDef* md) { upb_msg_begin(this, md); }
-    Iterator() {}
-
-    FieldDef* field() { return FieldDef::Cast(upb_msg_iter_field(this)); }
-    bool Done() { return upb_msg_done(this); }
-    void Next() { return upb_msg_next(this); }
-  };
-
-  class ConstIterator : public upb_msg_iter {
-   public:
-    explicit ConstIterator(const MessageDef* md) { upb_msg_begin(this, md); }
-    ConstIterator() {}
-
-    const FieldDef* field() { return FieldDef::Cast(upb_msg_iter_field(this)); }
-    bool Done() { return upb_msg_done(this); }
-    void Next() { return upb_msg_next(this); }
-  };
-
- private:
-  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(MessageDef);
-};
-
-class EnumDef : public upb_enumdef {
- public:
-  // Converting from C types to C++ wrapper types.
-  static EnumDef* Cast(upb_enumdef *e) { return static_cast<EnumDef*>(e); }
-  static const EnumDef* Cast(const upb_enumdef *e) {
-    return static_cast<const EnumDef*>(e);
-  }
-
-  static EnumDef* New(const void *owner) { return Cast(upb_enumdef_new(owner)); }
-
-  void Ref(const void *owner) { upb_enumdef_ref(this, owner); }
-  void Unref(const void *owner) { upb_enumdef_unref(this, owner); }
-  EnumDef* Dup(const void *owner) const {
-    return Cast(upb_enumdef_dup(this, owner));
-  }
-
-  Def* AsDef() { return Def::Cast(UPB_UPCAST(this)); }
-  const Def* AsDef() const { return Def::Cast(UPB_UPCAST(this)); }
-
-  int32_t default_value() const { return upb_enumdef_default(this); }
-
-  // May only be set if IsMutable().
-  void set_full_name(const char *name) { AsDef()->set_full_name(name); }
-  void set_full_name(const std::string& name) { AsDef()->set_full_name(name); }
-  void set_default_value(int32_t val) {
-    return upb_enumdef_setdefault(this, val);
-  }
-
-  // Adds a value to the enumdef.  Requires that no existing val has this
-  // name or number (returns false and does not add if there is).  May only
-  // be called if IsMutable().
-  bool AddValue(char *name, int32_t num) {
-    return upb_enumdef_addval(this, name, num);
-  }
-  bool AddValue(const std::string& name, int32_t num) {
-    return upb_enumdef_addval(this, name.c_str(), num);
-  }
-
-  // Lookups from name to integer and vice-versa.
-  bool LookupName(const char *name, int32_t* num) const {
-    return upb_enumdef_ntoi(this, name, num);
-  }
-
-  // Lookup from integer to name, returns a NULL-terminated string which
-  // the caller does not own, or NULL if not found.
-  const char *LookupNumber(int32_t num) const {
-    return upb_enumdef_iton(this, num);
-  }
-
- private:
-  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(EnumDef);
-};
-
-class SymbolTable : public upb_symtab {
- public:
-  // Converting from C types to C++ wrapper types.
-  static SymbolTable* Cast(upb_symtab *s) {
-    return static_cast<SymbolTable*>(s);
-  }
-  static const SymbolTable* Cast(const upb_symtab *s) {
-    return static_cast<const SymbolTable*>(s);
-  }
-
-  static SymbolTable* New(const void *owner) {
-    return Cast(upb_symtab_new(owner));
-  }
-
-  void Ref(const void *owner) const { upb_symtab_unref(this, owner); }
-  void Unref(const void *owner) const { upb_symtab_unref(this, owner); }
-  void DonateRef(const void *from, const void *to) const {
-    upb_symtab_donateref(this, from, to);
-  }
-
-  // Adds the given defs to the symtab, resolving all symbols.  Only one def
-  // per name may be in the list, but defs can replace existing defs in the
-  // symtab.  The entire operation either succeeds or fails.  If the operation
-  // fails, the symtab is unchanged, false is returned, and status indicates
-  // the error.  The caller passes a ref on the defs in all cases.
-  bool Add(Def *const *defs, int n, void *owner, Status* status) {
-    return upb_symtab_add(this, (upb_def*const*)defs, n, owner, status);
-  }
-  bool Add(const std::vector<Def*>& defs, void *owner, Status* status) {
-    return Add(&defs[0], defs.size(), owner, status);
-  }
-
-  // If the given name refers to a message in this symbol table, returns a new
-  // ref to that MessageDef object, otherwise returns NULL.
-  const MessageDef* LookupMessage(const char *name, void *owner) const {
-    return MessageDef::Cast(upb_symtab_lookupmsg(this, name, owner));
-  }
-
- private:
-  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(SymbolTable);
-};
-
-template <> inline const FieldDef* GetValue<const FieldDef*>(Value v) {
-  return static_cast<const FieldDef*>(upb_value_getfielddef(v));
-}
-
-template <> inline Value MakeValue<FieldDef*>(FieldDef* v) {
-  return upb_value_fielddef(v);
-}
-
-inline MessageDef* FieldDef::message() {
-  return MessageDef::Cast(upb_fielddef_msgdef(this));
-}
-inline const MessageDef* FieldDef::message() const {
-  return MessageDef::Cast(upb_fielddef_msgdef(this));
-}
-
-inline const Def* FieldDef::subdef() const {
-  return Def::Cast(upb_fielddef_subdef(this));
-}
-inline bool FieldDef::set_subdef(Def* def) {
-  return upb_fielddef_setsubdef(this, def);
-}
-
-}  // namespace upb
-
-#endif
--- a/bindings/cpp/upb/handlers.cc
+++ b/bindings/cpp/upb/handlers.cc
@ -1,39 +0,0 @@
-//
-// upb - a minimalist implementation of protocol buffers.
-//
-// Copyright (c) 2011 Google Inc.  See LICENSE for details.
-// Author: Josh Haberman <jhaberman@gmail.com>
-
-#include "handlers.hpp"
-
-#include "def.hpp"
-
-namespace upb {
-
-namespace {
-
-void MessageCallbackWrapper(
-    void* closure, upb_mhandlers* mh, const upb_msgdef* m) {
-  Handlers::MessageRegistrationVisitor* visitor =
-      static_cast<Handlers::MessageRegistrationVisitor*>(closure);
-  visitor->OnMessage(static_cast<MessageHandlers*>(mh),
-                     static_cast<const MessageDef*>(m));
-}
-
-void FieldCallbackWrapper(
-    void* closure, upb_fhandlers* fh, const upb_fielddef* f) {
-  Handlers::MessageRegistrationVisitor* visitor =
-      static_cast<Handlers::MessageRegistrationVisitor*>(closure);
-  visitor->OnField(static_cast<FieldHandlers*>(fh),
-                   static_cast<const FieldDef*>(f));
-}
-}  // namepace
-
-MessageHandlers* Handlers::RegisterMessageDef(
-    const MessageDef& m, Handlers::MessageRegistrationVisitor* visitor) {
-  upb_mhandlers* mh = upb_handlers_regmsgdef(
-      this, &m, &MessageCallbackWrapper, &FieldCallbackWrapper, &visitor);
-  return static_cast<MessageHandlers*>(mh);
-}
-
-}  // namespace upb
--- a/bindings/cpp/upb/handlers.hpp
+++ b/bindings/cpp/upb/handlers.hpp
@ -1,176 +0,0 @@
-//
-// upb - a minimalist implementation of protocol buffers.
-//
-// Copyright (c) 2011 Google Inc.  See LICENSE for details.
-// Author: Josh Haberman <jhaberman@gmail.com>
-//
-// upb::Handlers is a generic visitor-like interface for iterating over a
-// stream of protobuf data.  You can register function pointers that will be
-// called for each message and/or field as the data is being parsed or iterated
-// over, without having to know the source format that we are parsing from.
-// This decouples the parsing logic from the processing logic.
-
-#ifndef UPB_HANDLERS_HPP
-#define UPB_HANDLERS_HPP
-
-#include "upb/handlers.h"
-
-#include "upb/upb.hpp"
-
-namespace upb {
-
-typedef upb_fieldtype_t FieldType;
-typedef upb_flow_t Flow;
-typedef upb_sflow_t SubFlow;
-class MessageHandlers;
-class MessageDef;
-class FieldDef;
-
-class FieldHandlers : public upb_fhandlers {
- public:
-  typedef upb_value_handler ValueHandler;
-  typedef upb_startfield_handler StartFieldHandler;
-  typedef upb_endfield_handler EndFieldHandler;
-
-  // The FieldHandlers will live at least as long as the upb::Handlers to
-  // which it belongs, but can be Ref'd/Unref'd to make it live longer (which
-  // will prolong the life of the underlying upb::Handlers also).
-  void Ref()   { upb_fhandlers_ref(this); }
-  void Unref() { upb_fhandlers_unref(this); }
-
-  // Functions to set this field's handlers.
-  // These return "this" so they can be conveniently chained, eg.
-  //   message_handlers->NewField(...)
-  //       ->SetStartSequenceHandler(&StartSequence),
-  //       ->SetEndSequenceHandler(&EndSequence),
-  //       ->SetValueHandler(&Value);
-  FieldHandlers* SetValueHandler(ValueHandler* h) {
-    upb_fhandlers_setvalue(this, h); return this;
-  }
-  FieldHandlers* SetStartSequenceHandler(StartFieldHandler* h) {
-    upb_fhandlers_setstartseq(this, h); return this;
-  }
-  FieldHandlers* SetEndSequenceHandler(EndFieldHandler* h) {
-    upb_fhandlers_setendseq(this, h); return this;
-  }
-  FieldHandlers* SetStartSubmessageHandler(StartFieldHandler* h) {
-    upb_fhandlers_setstartsubmsg(this, h); return this;
-  }
-  FieldHandlers* SetEndSubmessageHandler(EndFieldHandler* h) {
-    upb_fhandlers_setendsubmsg(this, h); return this;
-  }
-
-  // Get/Set the field's bound value, which will be passed to its handlers.
-  Value GetBoundValue() const { return upb_fhandlers_getfval(this); }
-  FieldHandlers* SetBoundValue(Value val) {
-    upb_fhandlers_setfval(this, val); return this;
-  }
-
-  // Returns the MessageHandlers to which we belong.
-  MessageHandlers* GetMessageHandlers() const;
-  // Returns the MessageHandlers for this field's submessage (invalid to call
-  // unless this field's type UPB_TYPE(MESSAGE) or UPB_TYPE(GROUP).
-  MessageHandlers* GetSubMessageHandlers() const;
-  // If set to >=0, the given hasbit will be set after the value callback is
-  // called (offset relative to the current closure).
-  int32_t GetHasbit() const { return upb_fhandlers_gethasbit(this); }
-  void SetHasbit(int32_t bit) { upb_fhandlers_sethasbit(this, bit); }
-
- private:
-  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(FieldHandlers);
-};
-
-class MessageHandlers : public upb_mhandlers {
- public:
-  typedef upb_startmsg_handler StartMessageHandler;
-  typedef upb_endmsg_handler EndMessageHandler;
-
-  static MessageHandlers* Cast(upb_mhandlers* mh) {
-    return static_cast<MessageHandlers*>(mh);
-  }
-  static const MessageHandlers* Cast(const upb_mhandlers* mh) {
-    return static_cast<const MessageHandlers*>(mh);
-  }
-
-  // The MessageHandlers will live at least as long as the upb::Handlers to
-  // which it belongs, but can be Ref'd/Unref'd to make it live longer (which
-  // will prolong the life of the underlying upb::Handlers also).
-  void Ref()    { upb_mhandlers_ref(this); }
-  void Unref()  { upb_mhandlers_unref(this); }
-
-  // Functions to set this message's handlers.
-  // These return "this" so they can be conveniently chained, eg.
-  //   handlers->NewMessageHandlers()
-  //       ->SetStartMessageHandler(&StartMessage)
-  //       ->SetEndMessageHandler(&EndMessage);
-  MessageHandlers* SetStartMessageHandler(StartMessageHandler* h) {
-    upb_mhandlers_setstartmsg(this, h); return this;
-  }
-  MessageHandlers* SetEndMessageHandler(EndMessageHandler* h) {
-    upb_mhandlers_setendmsg(this, h); return this;
-  }
-
-  // Functions to create new FieldHandlers for this message.
-  FieldHandlers* NewFieldHandlers(uint32_t fieldnum, FieldType type,
-                                  bool repeated) {
-    return static_cast<FieldHandlers*>(
-        upb_mhandlers_newfhandlers(this, fieldnum, type, repeated));
-  }
-
-  // Like the previous but for MESSAGE or GROUP fields.  For GROUP fields, the
-  // given submessage must not have any fields with this field number.
-  FieldHandlers* NewFieldHandlersForSubmessage(uint32_t n, const char *name,
-                                               FieldType type, bool repeated,
-                                               MessageHandlers* subm) {
-    (void)name;
-    return static_cast<FieldHandlers*>(
-        upb_mhandlers_newfhandlers_subm(this, n, type, repeated, subm));
-  }
-
- private:
-  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(MessageHandlers);
-};
-
-class Handlers : public upb_handlers {
- public:
-  // Creates a new Handlers instance.
-  static Handlers* New() { return static_cast<Handlers*>(upb_handlers_new()); }
-
-  void Ref()   { upb_handlers_ref(this); }
-  void Unref() { upb_handlers_unref(this); }
-
-  // Returns a new MessageHandlers object.  The first such message that is
-  // obtained will be the top-level message for this Handlers object.
-  MessageHandlers* NewMessageHandlers() {
-    return static_cast<MessageHandlers*>(upb_handlers_newmhandlers(this));
-  }
-
-  // Convenience function for registering handlers for all messages and fields
-  // in a MessageDef and all its children.  For every registered message,
-  // OnMessage will be called on the visitor with newly-created MessageHandlers
-  // and MessageDef. Likewise with OnField will be called with newly-created
-  // FieldHandlers and FieldDef for each field.
-  class MessageRegistrationVisitor {
-   public:
-    virtual ~MessageRegistrationVisitor() {}
-    virtual void OnMessage(MessageHandlers* mh, const MessageDef* m) = 0;
-    virtual void OnField(FieldHandlers* fh, const FieldDef* f) = 0;
-  };
-  MessageHandlers* RegisterMessageDef(const MessageDef& m,
-                                      MessageRegistrationVisitor* visitor);
-
- private:
-  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(Handlers);
-};
-
-inline MessageHandlers* FieldHandlers::GetMessageHandlers() const {
-  return static_cast<MessageHandlers*>(upb_fhandlers_getmsg(this));
-}
-
-inline MessageHandlers* FieldHandlers::GetSubMessageHandlers() const {
-  return static_cast<MessageHandlers*>(upb_fhandlers_getsubmsg(this));
-}
-
-}  // namespace upb
-
-#endif
--- a/bindings/cpp/upb/msg.hpp
+++ b/bindings/cpp/upb/msg.hpp
@ -1,62 +0,0 @@
-//
-// upb - a minimalist implementation of protocol buffers.
-//
-// Copyright (c) 2011 Google Inc.  See LICENSE for details.
-// Author: Josh Haberman <jhaberman@gmail.com>
-// Routines for reading and writing message data to an in-memory structure,
-// similar to a C struct.
-//
-// upb does not define one single message object that everyone must use.
-// Rather it defines an abstract interface for reading and writing members
-// of a message object, and all of the parsers and serializers use this
-// abstract interface.  This allows upb's parsers and serializers to be used
-// regardless of what memory management scheme or synchronization model the
-// application is using.
-//
-// A standard set of accessors is provided for doing simple reads and writes at
-// a known offset into the message.  These accessors should be used when
-// possible, because they are specially optimized -- for example, the JIT can
-// recognize them and emit specialized code instead of having to call the
-// function at all.  The application can substitute its own accessors when the
-// standard accessors are not suitable.
-
-#ifndef UPB_MSG_HPP
-#define UPB_MSG_HPP
-
-#include "upb/msg.h"
-#include "upb/handlers.hpp"
-
-namespace upb {
-
-typedef upb_accessor_vtbl AccessorVTable;
-
-// Registers handlers for writing into a message of the given type using
-// whatever accessors it has defined.
-inline MessageHandlers* RegisterWriteHandlers(upb::Handlers* handlers,
-                                              const upb::MessageDef* md) {
-  return MessageHandlers::Cast(
-      upb_accessors_reghandlers(handlers, md));
-}
-
-template <typename T> static FieldHandlers::ValueHandler* GetValueHandler();
-
-// A handy templated function that will retrieve a value handler for a given
-// C++ type.
-#define GET_VALUE_HANDLER(type, ctype) \
-    template <> \
-    inline FieldHandlers::ValueHandler* GetValueHandler<ctype>() { \
-      return &upb_stdmsg_set ## type; \
-    }
-
-GET_VALUE_HANDLER(double, double);
-GET_VALUE_HANDLER(float, float);
-GET_VALUE_HANDLER(uint64, uint64_t);
-GET_VALUE_HANDLER(uint32, uint32_t);
-GET_VALUE_HANDLER(int64, int64_t);
-GET_VALUE_HANDLER(int32, int32_t);
-GET_VALUE_HANDLER(bool, bool);
-#undef GET_VALUE_HANDLER
-
-}  // namespace
-
-#endif
--- a/bindings/cpp/upb/pb/decoder.hpp
+++ b/bindings/cpp/upb/pb/decoder.hpp
@ -22,14 +22,14 @@

 #include "upb/pb/decoder.h"

-#include "upb/bytestream.hpp"
-#include "upb/upb.hpp"
+#include "upb/bytestream.h"
+#include "upb/upb.h"

 namespace upb {

 class DecoderPlan : public upb_decoderplan {
 public:
-  static DecoderPlan* New(Handlers* h, bool allow_jit) {
+  static DecoderPlan* New(const Handlers* h, bool allow_jit) {
    return static_cast<DecoderPlan*>(upb_decoderplan_new(h, allow_jit));
  }
  void Unref() { upb_decoderplan_unref(this); }
@ -54,9 +54,7 @@ class Decoder : public upb_decoder {
  // reset to a different plan.
  //
  // Must be called before ResetInput() or Decode().
-  void ResetPlan(DecoderPlan* plan, int32_t msg_offset) {
-    upb_decoder_resetplan(this, plan, msg_offset);
-  }
+  void ResetPlan(DecoderPlan* plan) { upb_decoder_resetplan(this, plan); }

  // Resets the input of the decoder.  This puts it in a state where it has not
  // seen any data, and expects the next data to be from the beginning of a new
@ -71,7 +69,7 @@ class Decoder : public upb_decoder {

  // Decodes serialized data (calling Handlers as the data is parsed) until
  // error or EOF (see status() for details).
-  Success Decode() { return upb_decoder_decode(this); }
+  Status::Success Decode() { return upb_decoder_decode(this); }

  const upb::Status& status() {
    return static_cast<const upb::Status&>(*upb_decoder_status(this));
--- a/bindings/cpp/upb/pb/glue.hpp
+++ b/bindings/cpp/upb/pb/glue.hpp
@ -1,35 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2011 Google Inc.  See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- */
-
-#ifndef UPB_PB_GLUE_HPP
-#define UPB_PB_GLUE_HPP
-
-#include "upb/upb.hpp"
-#include "upb/pb/glue.h"
-
-namespace upb {
-
-// All routines that load descriptors expect the descriptor to be a
-// FileDescriptorSet.
-bool LoadDescriptorFileIntoSymtab(SymbolTable* s, const char *fname,
-                                  Status* status) {
-  return upb_load_descriptor_file_into_symtab(s, fname, status);
-}
-
-bool LoadDescriptorIntoSymtab(SymbolTable* s, const char* str,
-                              size_t len, Status* status) {
-  return upb_load_descriptor_into_symtab(s, str, len, status);
-}
-
-template <typename T>
-bool LoadDescriptorIntoSymtab(SymbolTable* s, const T& desc, Status* status) {
-  return upb_load_descriptor_into_symtab(s, desc.c_str(), desc.size(), status);
-}
-
-}  // namespace upb
-
-#endif
--- a/bindings/cpp/upb/proto2_bridge.cc
+++ b/bindings/cpp/upb/proto2_bridge.cc
@ -1,892 +0,0 @@
-//
-// upb - a minimalist implementation of protocol buffers.
-//
-// Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
-// Author: Josh Haberman <jhaberman@gmail.com>
-
-#include <string>
-#include <typeinfo>
-#include "upb/bytestream.hpp"
-#include "upb/def.hpp"
-#include "upb/handlers.hpp"
-#include "upb/msg.hpp"
-#include "upb/proto2_bridge.hpp"
-
-namespace {
-
-static void* GetFieldPointer(void *message, const upb::FieldDef* f) {
-  return static_cast<char*>(message) + f->offset();
-}
-
-}  // namespace
-
-#ifdef UPB_GOOGLE3
-
-// TODO(haberman): friend upb so that this isn't required.
-#define protected public
-#include "net/proto2/public/repeated_field.h"
-#undef private
-
-#define private public
-#include "net/proto/proto2_reflection.h"
-#undef private
-
-#include "net/proto2/proto/descriptor.pb.h"
-#include "net/proto2/public/descriptor.h"
-#include "net/proto2/public/generated_message_reflection.h"
-#include "net/proto2/public/lazy_field.h"
-#include "net/proto2/public/message.h"
-#include "net/proto2/public/string_piece_field_support.h"
-#include "net/proto/internal_layout.h"
-#include "strings/cord.h"
-using ::proto2::Descriptor;
-using ::proto2::EnumDescriptor;
-using ::proto2::EnumValueDescriptor;
-using ::proto2::FieldDescriptor;
-using ::proto2::FieldOptions;
-using ::proto2::FileDescriptor;
-using ::proto2::internal::GeneratedMessageReflection;
-using ::proto2::internal::RepeatedPtrFieldBase;
-using ::proto2::internal::StringPieceField;
-using ::proto2::Message;
-using ::proto2::MessageFactory;
-using ::proto2::Reflection;
-using ::proto2::RepeatedField;
-using ::proto2::RepeatedPtrField;
-
-namespace upb {
-
-static const Message* GetPrototypeForField(const Message& m,
-                                           const FieldDescriptor* f);
-
-namespace proto2_bridge_google3 { class FieldAccessor; }
-
-using ::upb::proto2_bridge_google3::FieldAccessor;
-
-namespace proto2_bridge_google3 {
-
-static void AssignToCord(const ByteRegion* r, Cord* cord) {
-  // TODO(haberman): ref source data if source is a cord.
-  cord->Clear();
-  uint64_t ofs = r->start_ofs();
-  while (ofs < r->end_ofs()) {
-    size_t len;
-    const char *buf = r->GetPtr(ofs, &len);
-    cord->Append(StringPiece(buf, len));
-    ofs += len;
-  }
-}
-
-#else
-
-// TODO(haberman): friend upb so that this isn't required.
-#define protected public
-#include "google/protobuf/repeated_field.h"
-#undef protected
-
-#define private public
-#include "google/protobuf/generated_message_reflection.h"
-#undef private
-
-#include "google/protobuf/descriptor.h"
-#include "google/protobuf/descriptor.pb.h"
-#include "google/protobuf/message.h"
-using ::google::protobuf::Descriptor;
-using ::google::protobuf::EnumDescriptor;
-using ::google::protobuf::EnumValueDescriptor;
-using ::google::protobuf::FieldDescriptor;
-using ::google::protobuf::FieldOptions;
-using ::google::protobuf::FileDescriptor;
-using ::google::protobuf::internal::GeneratedMessageReflection;
-using ::google::protobuf::internal::RepeatedPtrFieldBase;
-using ::google::protobuf::Message;
-using ::google::protobuf::MessageFactory;
-using ::google::protobuf::Reflection;
-using ::google::protobuf::RepeatedField;
-using ::google::protobuf::RepeatedPtrField;
-
-namespace upb {
-static const Message* GetPrototypeForField(const Message& m,
-                                           const FieldDescriptor* f);
-
-namespace proto2_bridge_opensource { class FieldAccessor; }
-
-using ::upb::proto2_bridge_opensource::FieldAccessor;
-
-namespace proto2_bridge_opensource {
-
-#endif  // ifdef UPB_GOOGLE3
-
-// Have to define this manually since older versions of proto2 didn't define
-// an enum value for STRING.
-#define UPB_CTYPE_STRING 0
-
-// The code in this class depends on the internal representation of the proto2
-// generated classes, which is an internal implementation detail of proto2 and
-// is not a public interface.  As a result, this class's implementation may
-// need to be changed if/when proto2 changes its internal representation.  It
-// is intended that this class is the only code that depends on these internal,
-// non-public interfaces.
-//
-// This class only works with messages that use GeneratedMessageReflection.
-// Other reflection classes will need other accessor implementations.
-class FieldAccessor {
- public:
-  // Returns true if we were able to set an accessor and any other properties
-  // of the FieldDef that are necessary to read/write this field to a
-  // proto2::Message.
-  static bool TrySet(const FieldDescriptor* proto2_f,
-                     const upb::MessageDef* md,
-                     upb::FieldDef* upb_f) {
-    const Message* prototype = static_cast<const Message*>(md->prototype);
-    const Reflection* base_r = prototype->GetReflection();
-    const GeneratedMessageReflection* r =
-        dynamic_cast<const GeneratedMessageReflection*>(base_r);
-    // Old versions of the open-source protobuf release erroneously default to
-    // Cord even though that has never been supported in the open-source
-    // release.
-    int32_t ctype = proto2_f->options().has_ctype() ?
-        proto2_f->options().ctype() : UPB_CTYPE_STRING;
-    if (!r) return false;
-    // Extensions not supported yet.
-    if (proto2_f->is_extension()) return false;
-
-    upb_f->set_accessor(GetForFieldDescriptor(proto2_f, ctype));
-    upb_f->set_hasbit(GetHasbit(proto2_f, r));
-    upb_f->set_offset(GetOffset(proto2_f, r));
-    if (upb_f->IsSubmessage()) {
-      upb_f->set_subtype_name(proto2_f->message_type()->full_name());
-      upb_f->prototype = GetPrototypeForField(*prototype, proto2_f);
-    }
-
-    if (upb_f->IsString() && !upb_f->IsSequence() &&
-        ctype == UPB_CTYPE_STRING) {
-      upb_f->prototype = &r->GetStringReference(*prototype, proto2_f, NULL);
-    }
-    return true;
-  }
-
-  static MessageFactory* GetMessageFactory(const Message& m) {
-    const GeneratedMessageReflection* r =
-        dynamic_cast<const GeneratedMessageReflection*>(m.GetReflection());
-    return r ? r->message_factory_ : NULL;
-  }
-
- private:
-  static int64_t GetHasbit(const FieldDescriptor* f,
-                           const GeneratedMessageReflection* r) {
-    if (f->is_repeated()) {
-      // proto2 does not store hasbits for repeated fields.
-      return -1;
-    } else {
-      return (r->has_bits_offset_ * 8) + f->index();
-    }
-  }
-
-  static uint16_t GetOffset(const FieldDescriptor* f,
-                            const GeneratedMessageReflection* r) {
-    return r->offsets_[f->index()];
-  }
-
-  static AccessorVTable *GetForFieldDescriptor(const FieldDescriptor* f,
-                                               int32_t ctype) {
-    switch (f->cpp_type()) {
-      case FieldDescriptor::CPPTYPE_ENUM:
-        // Should handlers validate enum membership to match proto2?
-      case FieldDescriptor::CPPTYPE_INT32: return Get<int32_t>();
-      case FieldDescriptor::CPPTYPE_INT64: return Get<int64_t>();
-      case FieldDescriptor::CPPTYPE_UINT32: return Get<uint32_t>();
-      case FieldDescriptor::CPPTYPE_UINT64: return Get<uint64_t>();
-      case FieldDescriptor::CPPTYPE_DOUBLE: return Get<double>();
-      case FieldDescriptor::CPPTYPE_FLOAT: return Get<float>();
-      case FieldDescriptor::CPPTYPE_BOOL: return Get<bool>();
-      case FieldDescriptor::CPPTYPE_STRING:
-        switch (ctype) {
-#ifdef UPB_GOOGLE3
-          case FieldOptions::STRING:
-            return GetForString<string>();
-          case FieldOptions::CORD:
-            return GetForCord();
-          case FieldOptions::STRING_PIECE:
-            return GetForStringPiece();
-#else
-          case UPB_CTYPE_STRING:
-            return GetForString<std::string>();
-#endif
-          default: return NULL;
-        }
-      case FieldDescriptor::CPPTYPE_MESSAGE:
-#ifdef UPB_GOOGLE3
-        if (f->options().lazy()) {
-          return NULL;  // Not yet implemented.
-        } else {
-          return GetForMessage();
-        }
-#else
-        return GetForMessage();
-#endif
-      default: return NULL;
-    }
-  }
-
-  // PushOffset handler (used for StartSequence and others)  ///////////////////
-
-  static SubFlow PushOffset(void *m, Value fval) {
-    const FieldDef *f = GetValue<const FieldDef*>(fval);
-    return UPB_CONTINUE_WITH(GetFieldPointer(m, f));
-  }
-
-  // Primitive Value (numeric, enum, bool) /////////////////////////////////////
-
-  template <typename T> static AccessorVTable *Get() {
-    static upb_accessor_vtbl vtbl = {
-      NULL,  // StartSubMessage handler
-      GetValueHandler<T>(),
-      &PushOffset,  // StartSequence handler
-      NULL,  // StartRepeatedSubMessage handler
-      &Append<T>,
-      NULL, NULL, NULL, NULL, NULL, NULL};
-    return &vtbl;
-  }
-
-  template <typename T>
-  static Flow Append(void *_r, Value fval, Value val) {
-    (void)fval;
-    RepeatedField<T>* r = static_cast<RepeatedField<T>*>(_r);
-    r->Add(GetValue<T>(val));
-    return UPB_CONTINUE;
-  }
-
-  // String ////////////////////////////////////////////////////////////////////
-
-  template <typename T> static AccessorVTable *GetForString() {
-    static upb_accessor_vtbl vtbl = {
-      NULL,  // StartSubMessage handler
-      &SetString<T>,
-      &PushOffset,  // StartSequence handler
-      NULL,  // StartRepeatedSubMessage handler
-      &AppendString<T>,
-      NULL, NULL, NULL, NULL, NULL, NULL};
-    return &vtbl;
-  }
-
-  // This needs to be templated because google3 string is not std::string.
-  template <typename T> static Flow SetString(void *m, Value fval, Value val) {
-    const FieldDef* f = GetValue<const FieldDef*>(fval);
-    T **str = static_cast<T**>(GetFieldPointer(m, f));
-    // If it points to the default instance, we must create a new instance.
-    if (*str == f->prototype) *str = new T();
-    GetValue<ByteRegion*>(val)->AssignToString(*str);
-    return UPB_CONTINUE;
-  }
-
-  template <typename T>
-  static Flow AppendString(void *_r, Value fval, Value val) {
-    (void)fval;
-    RepeatedPtrField<T>* r = static_cast<RepeatedPtrField<T>*>(_r);
-    GetValue<ByteRegion*>(val)->AssignToString(r->Add());
-    return UPB_CONTINUE;
-  }
-
-  // SubMessage ////////////////////////////////////////////////////////////////
-
-  static AccessorVTable *GetForMessage() {
-    static upb_accessor_vtbl vtbl = {
-      &StartSubMessage,
-      NULL,  // Value handler
-      &PushOffset,  // StartSequence handler
-      &StartRepeatedSubMessage,
-      NULL,  // Repeated value handler
-      NULL, NULL, NULL, NULL, NULL, NULL};
-    return &vtbl;
-  }
-
-  static SubFlow StartSubMessage(void *m, Value fval) {
-    const FieldDef* f = GetValue<const FieldDef*>(fval);
-    void **subm = static_cast<void**>(GetFieldPointer(m, f));
-    if (*subm == NULL || *subm == f->prototype) {
-      const Message* prototype = static_cast<const Message*>(f->prototype);
-      *subm = prototype->New();
-    }
-    return UPB_CONTINUE_WITH(*subm);
-  }
-
-  class RepeatedMessageTypeHandler {
-   public:
-    typedef void Type;
-    // AddAllocated() calls this, but only if other objects are sitting
-    // around waiting for reuse, which we will not do.
-    static void Delete(Type* t) {
-      (void)t;
-      assert(false);
-    }
-  };
-
-  // Closure is a RepeatedPtrField<SubMessageType>*, but we access it through
-  // its base class RepeatedPtrFieldBase*.
-  static SubFlow StartRepeatedSubMessage(void* _r, Value fval) {
-    const FieldDef* f = GetValue<const FieldDef*>(fval);
-    RepeatedPtrFieldBase *r = static_cast<RepeatedPtrFieldBase*>(_r);
-    void *submsg = r->AddFromCleared<RepeatedMessageTypeHandler>();
-    if (!submsg) {
-      const Message* prototype = static_cast<const Message*>(f->prototype);
-      submsg = prototype->New();
-      r->AddAllocated<RepeatedMessageTypeHandler>(submsg);
-    }
-    return UPB_CONTINUE_WITH(submsg);
-  }
-
-  // TODO(haberman): handle Extensions, Unknown Fields.
-
-#ifdef UPB_GOOGLE3
-  // Handlers for types/features only included in internal proto2 release:
-  // Cord, StringPiece, LazyField, and MessageSet.
-  // TODO(haberman): LazyField, MessageSet.
-
-  // Cord //////////////////////////////////////////////////////////////////////
-
-  static AccessorVTable *GetForCord() {
-    static upb_accessor_vtbl vtbl = {
-      NULL,  // StartSubMessage handler
-      &SetCord,
-      &PushOffset,  // StartSequence handler
-      NULL,  // StartRepeatedSubMessage handler
-      &AppendCord,
-      NULL, NULL, NULL, NULL, NULL, NULL};
-    return &vtbl;
-  }
-
-  static Flow SetCord(void *m, Value fval, Value val) {
-    const FieldDef* f = GetValue<const FieldDef*>(fval);
-    Cord* field = static_cast<Cord*>(GetFieldPointer(m, f));
-    AssignToCord(GetValue<ByteRegion*>(val), field);
-    return UPB_CONTINUE;
-  }
-
-  static Flow AppendCord(void *_r, Value fval, Value val) {
-    RepeatedField<Cord>* r = static_cast<RepeatedField<Cord>*>(_r);
-    AssignToCord(GetValue<ByteRegion*>(val), r->Add());
-    return UPB_CONTINUE;
-  }
-
-  // StringPiece ///////////////////////////////////////////////////////////////
-
-  static AccessorVTable *GetForStringPiece() {
-    static upb_accessor_vtbl vtbl = {
-      NULL,  // StartSubMessage handler
-      &SetStringPiece,
-      &PushOffset,  // StartSequence handler
-      NULL,  // StartRepeatedSubMessage handler
-      &AppendStringPiece,
-      NULL, NULL, NULL, NULL, NULL, NULL};
-    return &vtbl;
-  }
-
-  static void AssignToStringPieceField(const ByteRegion* r,
-                                       proto2::internal::StringPieceField* f) {
-    // TODO(haberman): alias if possible and enabled on the input stream.
-    // TODO(haberman): add a method to StringPieceField that lets us avoid
-    // this copy/malloc/free.
-    char *data = new char[r->Length()];
-    r->Copy(r->start_ofs(), r->Length(), data);
-    f->CopyFrom(StringPiece(data, r->Length()));
-    delete[] data;
-  }
-
-  static Flow SetStringPiece(void *m, Value fval, Value val) {
-    const FieldDef* f = GetValue<const FieldDef*>(fval);
-    StringPieceField* field =
-        static_cast<StringPieceField*>(GetFieldPointer(m, f));
-    AssignToStringPieceField(GetValue<ByteRegion*>(val), field);
-    return UPB_CONTINUE;
-  }
-
-  static Flow AppendStringPiece(void* _r, Value fval, Value val) {
-    RepeatedPtrField<StringPieceField>* r =
-        static_cast<RepeatedPtrField<StringPieceField>*>(_r);
-    AssignToStringPieceField(GetValue<ByteRegion*>(val), r->Add());
-    return UPB_CONTINUE;
-  }
-
-#endif  // UPB_GOOGLE3
-};
-
-#ifdef UPB_GOOGLE3
-
-// Proto1 accessor -- only needed inside Google.
-class Proto1FieldAccessor {
- public:
-  // Returns true if we were able to set an accessor and any other properties
-  // of the FieldDef that are necessary to read/write this field to a
-  // proto2::Message.
-  static bool TrySet(const FieldDescriptor* proto2_f,
-                     const upb::MessageDef* md,
-                     upb::FieldDef* upb_f) {
-    const Message* m = static_cast<const Message*>(md->prototype);
-    const proto2::Reflection* base_r = m->GetReflection();
-    const _pi::Proto2Reflection* r =
-        dynamic_cast<const _pi::Proto2Reflection*>(base_r);
-    if (!r) return false;
-    // Extensions not supported yet.
-    if (proto2_f->is_extension()) return false;
-
-    const _pi::Field* f = r->GetFieldLayout(proto2_f);
-
-    if (f->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK) {
-      // Override the BYTES type that proto2 descriptors have for weak fields.
-      upb_f->set_type(UPB_TYPE(MESSAGE));
-    }
-
-    if (upb_f->IsSubmessage()) {
-      const Message* prototype = upb::GetPrototypeForField(*m, proto2_f);
-      upb_f->set_subtype_name(prototype->GetDescriptor()->full_name());
-      upb_f->prototype = prototype;
-    }
-
-    upb_f->set_accessor(GetForCrep(f->crep));
-    upb_f->set_hasbit(GetHasbit(proto2_f, r));
-    upb_f->set_offset(GetOffset(proto2_f, r));
-    return true;
-  }
-
- private:
-  static int16_t GetHasbit(const FieldDescriptor* f,
-                           const _pi::Proto2Reflection* r) {
-    if (f->is_repeated()) {
-      // proto1 does not store hasbits for repeated fields.
-      return -1;
-    } else {
-      return (r->layout_->has_bit_offset * 8) + r->GetFieldLayout(f)->has_index;
-    }
-  }
-
-  static uint16_t GetOffset(const FieldDescriptor* f,
-                            const _pi::Proto2Reflection* r) {
-    return r->GetFieldLayout(f)->offset;
-  }
-
-  static AccessorVTable *GetForCrep(int crep) {
-#define PRIMITIVE(name, type_name) \
-    case _pi::CREP_REQUIRED_ ## name: \
-    case _pi::CREP_OPTIONAL_ ## name: \
-    case _pi::CREP_REPEATED_ ## name: return Get<type_name>();
-
-    switch (crep) {
-      PRIMITIVE(DOUBLE,   double);
-      PRIMITIVE(FLOAT,    float);
-      PRIMITIVE(INT64,    int64_t);
-      PRIMITIVE(UINT64,   uint64_t);
-      PRIMITIVE(INT32,    int32_t);
-      PRIMITIVE(FIXED64,  uint64_t);
-      PRIMITIVE(FIXED32,  uint32_t);
-      PRIMITIVE(BOOL,     bool);
-      case _pi::CREP_REQUIRED_STRING:
-      case _pi::CREP_OPTIONAL_STRING:
-      case _pi::CREP_REPEATED_STRING: return GetForString();
-      case _pi::CREP_OPTIONAL_OUTOFLINE_STRING: return GetForOutOfLineString();
-      case _pi::CREP_REQUIRED_CORD:
-      case _pi::CREP_OPTIONAL_CORD:
-      case _pi::CREP_REPEATED_CORD: return GetForCord();
-      case _pi::CREP_REQUIRED_GROUP:
-      case _pi::CREP_REQUIRED_FOREIGN:
-      case _pi::CREP_REQUIRED_FOREIGN_PROTO2: return GetForRequiredMessage();
-      case _pi::CREP_OPTIONAL_GROUP:
-      case _pi::CREP_REPEATED_GROUP:
-      case _pi::CREP_OPTIONAL_FOREIGN:
-      case _pi::CREP_REPEATED_FOREIGN:
-      case _pi::CREP_OPTIONAL_FOREIGN_PROTO2:
-      case _pi::CREP_REPEATED_FOREIGN_PROTO2: return GetForMessage();
-      case _pi::CREP_OPTIONAL_FOREIGN_WEAK: return GetForWeakMessage();
-      default: assert(false); return NULL;
-    }
-#undef PRIMITIVE
-  }
-
-  // PushOffset handler (used for StartSequence and others)  ///////////////////
-
-  // We can find a RepeatedField* or a RepeatedPtrField* at f->offset().
-  static SubFlow PushOffset(void *m, Value fval) {
-    const FieldDef *f = GetValue<const FieldDef*>(fval);
-    return UPB_CONTINUE_WITH(GetFieldPointer(m, f));
-  }
-
-  // Primitive Value (numeric, enum, bool) /////////////////////////////////////
-
-  template <typename T> static AccessorVTable *Get() {
-    static upb_accessor_vtbl vtbl = {
-      NULL,  // StartSubMessage handler
-      GetValueHandler<T>(),
-      &PushOffset,  // StartSequence handler
-      NULL,  // StartRepeatedSubMessage handler
-      &Append<T>,
-      NULL, NULL, NULL, NULL, NULL, NULL};
-    return &vtbl;
-  }
-
-  template <typename T>
-  static Flow Append(void *_r, Value fval, Value val) {
-    (void)fval;
-    // Proto1's ProtoArray class derives from RepeatedField.
-    RepeatedField<T>* r = static_cast<RepeatedField<T>*>(_r);
-    r->Add(GetValue<T>(val));
-    return UPB_CONTINUE;
-  }
-
-  // String ////////////////////////////////////////////////////////////////////
-
-  static AccessorVTable *GetForString() {
-    static upb_accessor_vtbl vtbl = {
-      NULL,  // StartSubMessage handler
-      &SetString,
-      &PushOffset,  // StartSequence handler
-      NULL,  // StartRepeatedSubMessage handler
-      &AppendString,
-      NULL, NULL, NULL, NULL, NULL, NULL};
-    return &vtbl;
-  }
-
-  static Flow SetString(void *m, Value fval, Value val) {
-    const FieldDef* f = GetValue<const FieldDef*>(fval);
-    string *str = static_cast<string*>(GetFieldPointer(m, f));
-    GetValue<ByteRegion*>(val)->AssignToString(str);
-    return UPB_CONTINUE;
-  }
-
-  static Flow AppendString(void *_r, Value fval, Value val) {
-    (void)fval;
-    RepeatedPtrField<string>* r = static_cast<RepeatedPtrField<string>*>(_r);
-    GetValue<ByteRegion*>(val)->AssignToString(r->Add());
-    return UPB_CONTINUE;
-  }
-
-  // Out-of-line string ////////////////////////////////////////////////////////
-
-  static AccessorVTable *GetForOutOfLineString() {
-    static upb_accessor_vtbl vtbl = {
-      NULL, &SetOutOfLineString,
-      // This type is only used for non-repeated string fields.
-      NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
-    return &vtbl;
-  }
-
-  static Flow SetOutOfLineString(void *m, Value fval, Value val) {
-    const FieldDef* f = GetValue<const FieldDef*>(fval);
-    string **str = static_cast<string**>(GetFieldPointer(m, f));
-    if (*str == &::ProtocolMessage::___empty_internal_proto_string_)
-      *str = new string();
-    GetValue<ByteRegion*>(val)->AssignToString(*str);
-    return UPB_CONTINUE;
-  }
-
-  // Cord //////////////////////////////////////////////////////////////////////
-
-  static AccessorVTable *GetForCord() {
-    static upb_accessor_vtbl vtbl = {
-      NULL,  // StartSubMessage handler
-      &SetCord,
-      &PushOffset,  // StartSequence handler
-      NULL,  // StartRepeatedSubMessage handler
-      &AppendCord,
-      NULL, NULL, NULL, NULL, NULL, NULL};
-    return &vtbl;
-  }
-
-  static Flow SetCord(void *m, Value fval, Value val) {
-    const FieldDef* f = GetValue<const FieldDef*>(fval);
-    Cord* field = static_cast<Cord*>(GetFieldPointer(m, f));
-    AssignToCord(GetValue<ByteRegion*>(val), field);
-    return UPB_CONTINUE;
-  }
-
-  static Flow AppendCord(void *_r, Value fval, Value val) {
-    RepeatedField<Cord>* r = static_cast<RepeatedField<Cord>*>(_r);
-    AssignToCord(GetValue<ByteRegion*>(val), r->Add());
-    return UPB_CONTINUE;
-  }
-
-  // SubMessage ////////////////////////////////////////////////////////////////
-
-  static AccessorVTable *GetForRequiredMessage() {
-    static upb_accessor_vtbl vtbl = {
-      &PushOffset,  // StartSubMessage handler
-      NULL,  // Value handler
-      &PushOffset,  // StartSequence handler
-      &StartRepeatedSubMessage,
-      NULL,  // Repeated value handler
-      NULL, NULL, NULL, NULL, NULL, NULL};
-    return &vtbl;
-  }
-
-  static AccessorVTable *GetForWeakMessage() {
-    static upb_accessor_vtbl vtbl = {
-      &StartWeakSubMessage,  // StartSubMessage handler
-      NULL,  // Value handler
-      &PushOffset,  // StartSequence handler
-      &StartRepeatedSubMessage,
-      NULL,  // Repeated value handler
-      NULL, NULL, NULL, NULL, NULL, NULL};
-    return &vtbl;
-  }
-
-  static AccessorVTable *GetForMessage() {
-    static upb_accessor_vtbl vtbl = {
-      &StartSubMessage,
-      NULL,  // Value handler
-      &PushOffset,  // StartSequence handler
-      &StartRepeatedSubMessage,
-      NULL,  // Repeated value handler
-      NULL, NULL, NULL, NULL, NULL, NULL};
-    return &vtbl;
-  }
-
-  static SubFlow StartSubMessage(void *m, Value fval) {
-    const FieldDef* f = GetValue<const FieldDef*>(fval);
-    Message **subm = static_cast<Message**>(GetFieldPointer(m, f));
-    if (*subm == f->prototype) *subm = (*subm)->New();
-    return UPB_CONTINUE_WITH(*subm);
-  }
-
-  static SubFlow StartWeakSubMessage(void *m, Value fval) {
-    const FieldDef* f = GetValue<const FieldDef*>(fval);
-    Message **subm = static_cast<Message**>(GetFieldPointer(m, f));
-    if (*subm == NULL) {
-      const Message* prototype = static_cast<const Message*>(f->prototype);
-      *subm = prototype->New();
-    }
-    return UPB_CONTINUE_WITH(*subm);
-  }
-
-  class RepeatedMessageTypeHandler {
-   public:
-    typedef void Type;
-    // AddAllocated() calls this, but only if other objects are sitting
-    // around waiting for reuse, which we will not do.
-    static void Delete(Type* t) {
-      (void)t;
-      assert(false);
-    }
-  };
-
-  // Closure is a RepeatedPtrField<SubMessageType>*, but we access it through
-  // its base class RepeatedPtrFieldBase*.
-  static SubFlow StartRepeatedSubMessage(void* _r, Value fval) {
-    const FieldDef* f = GetValue<const FieldDef*>(fval);
-    RepeatedPtrFieldBase *r = static_cast<RepeatedPtrFieldBase*>(_r);
-    void *submsg = r->AddFromCleared<RepeatedMessageTypeHandler>();
-    if (!submsg) {
-      const Message* prototype = static_cast<const Message*>(f->prototype);
-      submsg = prototype->New();
-      r->AddAllocated<RepeatedMessageTypeHandler>(submsg);
-    }
-    return UPB_CONTINUE_WITH(submsg);
-  }
-};
-
-#endif
-
-}  // namespace proto2_bridge_{google3,opensource}
-
-static const Message* GetPrototypeForMessage(const Message& m) {
-  const Message* ret = NULL;
-  MessageFactory* factory = FieldAccessor::GetMessageFactory(m);
-  if (factory) {
-    // proto2 generated message or DynamicMessage.
-    ret = factory->GetPrototype(m.GetDescriptor());
-    assert(ret);
-  } else {
-    // Proto1 message; since proto1 has no dynamic message, it must be
-    // from the generated factory.
-    ret = MessageFactory::generated_factory()->GetPrototype(m.GetDescriptor());
-    assert(ret);  // If NULL, then wasn't a proto1 message, can't handle it.
-  }
-  assert(ret->GetReflection() == m.GetReflection());
-  return ret;
-}
-
-static const Message* GetPrototypeForField(const Message& m,
-                                           const FieldDescriptor* f) {
-#ifdef UPB_GOOGLE3
-  if (f->type() == FieldDescriptor::TYPE_BYTES) {
-    // Proto1 weak field: the proto2 descriptor says their type is BYTES.
-    const _pi::Proto2Reflection* r =
-        dynamic_cast<const _pi::Proto2Reflection*>(m.GetReflection());
-    assert(r);
-    const _pi::Field* field = r->GetFieldLayout(f);
-    assert(field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK);
-    return GetPrototypeForMessage(
-        *static_cast<const Message*>(field->weak_layout()->default_instance));
-  } else if (dynamic_cast<const _pi::Proto2Reflection*>(m.GetReflection())) {
-    // Proto1 message; since proto1 has no dynamic message, it must be from
-    // the generated factory.
-    const Message* ret =
-        MessageFactory::generated_factory()->GetPrototype(f->message_type());
-    assert(ret);
-    return ret;
-  }
-#endif
-  assert(f->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE);
-  // We assume that all submessages (and extensions) will be constructed using
-  // the same MessageFactory as this message.  This doesn't cover the case of
-  // CodedInputStream::SetExtensionRegistry().
-  MessageFactory* factory = FieldAccessor::GetMessageFactory(m);
-  assert(factory);  // If neither proto1 nor proto2 we can't handle it.
-  const Message* ret = factory->GetPrototype(f->message_type());
-  assert(ret);
-  return ret;
-}
-
-namespace proto2_bridge {
-
-upb::FieldDef* AddFieldDef(const FieldDescriptor* f, upb::MessageDef* md) {
-  upb::FieldDef* upb_f = upb::FieldDef::New(&upb_f);
-  upb_f->set_number(f->number());
-  upb_f->set_name(f->name());
-  upb_f->set_label(static_cast<upb::Label>(f->label()));
-  upb_f->set_type(static_cast<upb::FieldType>(f->type()));
-
-  if (!FieldAccessor::TrySet(f, md, upb_f)
-#ifdef UPB_GOOGLE3
-      && !proto2_bridge_google3::Proto1FieldAccessor::TrySet(f, md, upb_f)
-#endif
-     ) {
-    // Unsupported reflection class.
-    assert(false);
-  }
-
-  if (upb_f->type() == UPB_TYPE(ENUM)) {
-    // We set the enum default symbolically.
-    upb_f->set_default(f->default_value_enum()->name());
-    upb_f->set_subtype_name(f->enum_type()->full_name());
-  } else {
-    // Set field default for primitive types.  Need to switch on the upb type
-    // rather than the proto2 type, because upb_f->type() may have been changed
-    // from BYTES to MESSAGE for a weak field.
-    switch (upb_types[upb_f->type()].inmemory_type) {
-      case UPB_CTYPE_INT32:
-        upb_f->set_default(MakeValue(f->default_value_int32()));
-        break;
-      case UPB_CTYPE_INT64:
-        upb_f->set_default(
-            MakeValue(static_cast<int64_t>(f->default_value_int64())));
-        break;
-      case UPB_CTYPE_UINT32:
-        upb_f->set_default(MakeValue(f->default_value_uint32()));
-        break;
-      case UPB_CTYPE_UINT64:
-        upb_f->set_default(
-            MakeValue(static_cast<uint64_t>(f->default_value_uint64())));
-        break;
-      case UPB_CTYPE_DOUBLE:
-        upb_f->set_default(MakeValue(f->default_value_double()));
-        break;
-      case UPB_CTYPE_FLOAT:
-        upb_f->set_default(MakeValue(f->default_value_float()));
-        break;
-      case UPB_CTYPE_BOOL:
-        upb_f->set_default(MakeValue(f->default_value_bool()));
-        break;
-      case UPB_CTYPE_BYTEREGION:
-        upb_f->set_default(f->default_value_string());
-        break;
-    }
-  }
-  return md->AddField(upb_f, &upb_f) ? upb_f : NULL;
-}
-
-upb::MessageDef *NewEmptyMessageDef(const Message& m, void *owner) {
-  upb::MessageDef *md = upb::MessageDef::New(owner);
-  md->set_full_name(m.GetDescriptor()->full_name());
-  md->prototype = GetPrototypeForMessage(m);
-  return md;
-}
-
-upb::EnumDef* NewEnumDef(const EnumDescriptor* desc, void *owner) {
-  upb::EnumDef* e = upb::EnumDef::New(owner);
-  e->set_full_name(desc->full_name());
-  for (int i = 0; i < desc->value_count(); i++) {
-    const EnumValueDescriptor* val = desc->value(i);
-    bool success = e->AddValue(val->name(), val->number());
-    assert(success);
-    (void)success;
-  }
-  return e;
-}
-
-void AddAllFields(upb::MessageDef* md) {
-  const Descriptor* d =
-      static_cast<const Message*>(md->prototype)->GetDescriptor();
-  for (int i = 0; i < d->field_count(); i++) {
-#ifdef UPB_GOOGLE3
-    // Skip lazy fields for now since we can't properly handle them.
-    if (d->field(i)->options().lazy()) continue;
-#endif
-    // Extensions not supported yet.
-    if (d->field(i)->is_extension()) continue;
-    AddFieldDef(d->field(i), md);
-  }
-}
-
-upb::MessageDef *NewFullMessageDef(const Message& m, void *owner) {
-  upb::MessageDef* md = NewEmptyMessageDef(m, owner);
-  AddAllFields(md);
-  // TODO(haberman): add unknown field handler and extensions.
-  return md;
-}
-
-typedef std::map<std::string, upb::Def*> SymbolMap;
-
-static upb::MessageDef* NewFinalMessageDefHelper(const Message& m, void *owner,
-                                                 SymbolMap* symbols) {
-  upb::MessageDef* md = NewFullMessageDef(m, owner);
-  // Must do this before processing submessages to prevent infinite recursion.
-  (*symbols)[std::string(md->full_name())] = md->AsDef();
-
-  for (upb::MessageDef::Iterator i(md); !i.Done(); i.Next()) {
-    upb::FieldDef* f = i.field();
-    if (!f->HasSubDef()) continue;
-    SymbolMap::iterator iter = symbols->find(f->subtype_name());
-    upb::Def* subdef;
-    if (iter != symbols->end()) {
-      subdef = iter->second;
-    } else {
-      const FieldDescriptor* proto2_f =
-          m.GetDescriptor()->FindFieldByNumber(f->number());
-      if (f->type() == UPB_TYPE(ENUM)) {
-        subdef = NewEnumDef(proto2_f->enum_type(), owner)->AsDef();
-        (*symbols)[std::string(subdef->full_name())] = subdef;
-      } else {
-        assert(f->IsSubmessage());
-        const Message* prototype = GetPrototypeForField(m, proto2_f);
-        subdef = NewFinalMessageDefHelper(*prototype, owner, symbols)->AsDef();
-      }
-    }
-    f->set_subdef(subdef);
-  }
-  return md;
-}
-
-const upb::MessageDef* NewFinalMessageDef(const Message& m, void *owner) {
-  SymbolMap symbols;
-  upb::MessageDef* ret = NewFinalMessageDefHelper(m, owner, &symbols);
-
-  // Finalize defs.
-  std::vector<upb::Def*> defs;
-  SymbolMap::iterator iter;
-  for (iter = symbols.begin(); iter != symbols.end(); ++iter) {
-    defs.push_back(iter->second);
-  }
-  Status status;
-  bool success = Def::Finalize(defs, &status);
-  assert(success);
-  (void)success;
-
-  // Unref all defs except the top-level one that we are returning.
-  for (int i = 0; i < static_cast<int>(defs.size()); i++) {
-    if (defs[i] != ret->AsDef()) defs[i]->Unref(owner);
-  }
-
-  return ret;
-}
-
-}  // namespace proto2_bridge
-}  // namespace upb
--- a/bindings/cpp/upb/proto2_bridge.hpp
+++ b/bindings/cpp/upb/proto2_bridge.hpp
@ -1,170 +0,0 @@
-//
-// upb - a minimalist implementation of protocol buffers.
-//
-// Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
-// Author: Josh Haberman <jhaberman@gmail.com>
-//
-// A bridge between upb and proto2, allows populating proto2 generated
-// classes using upb's parser, translating between descriptors and defs, etc.
-//
-// This is designed to be able to be compiled against either the open-source
-// version of protocol buffers or the Google-internal proto2.  The two are
-// the same in most ways, but live in different namespaces (proto2 vs
-// google::protobuf) and have a few other more minor differences.
-//
-// The bridge gives you a lot of control over which fields will be written to
-// the message (fields that are not written will just be skipped), and whether
-// unknown fields are written to the UnknownFieldSet.  This can save a lot of
-// work if the client only cares about some subset of the fields.
-//
-// Example usage:
-//
-//   // Build a def that will have all fields and parse just like proto2 would.
-//   const upb::MessageDef* md = upb::proto2_bridge::NewMessageDef(&MyProto());
-//
-//   // JIT the parser; should only be done once ahead-of-time.
-//   upb::Handlers* handlers = upb::NewHandlersForMessage(md);
-//   upb::DecoderPlan* plan = upb::DecoderPlan::New(handlers);
-//   handlers->Unref();
-//
-//   // The actual parsing.
-//   MyProto proto;
-//   upb::Decoder decoder;
-//   upb::StringSource source(buf, len);
-//   decoder.ResetPlan(plan, 0);
-//   decoder.ResetInput(source.AllBytes(), &proto);
-//   CHECK(decoder.Decode() == UPB_OK) << decoder.status();
-//
-// To parse only one field and skip all others:
-//
-//   const upb::MessageDef* md =
-//       upb::proto2_bridge::NewEmptyMessageDef(MyProto().GetPrototype());
-//   upb::proto2_bridge::AddFieldDef(
-//       MyProto::descriptor()->FindFieldByName("my_field"), md);
-//   upb::Finalize(md);
-//
-//   // Now continue with "JIT the parser" from above.
-//
-// Note that there is currently no support for
-// CodedInputStream::SetExtensionRegistry(), which allows specifying a separate
-// DescriptorPool and MessageFactory for extensions.  Since this is a property
-// of the input in proto2, it's difficult to build a plan ahead-of-time that
-// can properly support this.  If it's an important use case, the caller should
-// probably build a upb plan explicitly.
-
-#ifndef UPB_PROTO2_BRIDGE
-#define UPB_PROTO2_BRIDGE
-
-#include <vector>
-
-namespace google {
-namespace protobuf {
-class Descriptor;
-class EnumDescriptor;
-class FieldDescriptor;
-class FileDescriptor;
-class Message;
-}  // namespace google
-}  // namespace protobuf
-
-namespace proto2 {
-class Descriptor;
-class EnumDescriptor;
-class FieldDescriptor;
-class FileDescriptor;
-class Message;
-}  // namespace proto2
-
-
-namespace upb {
-
-class Def;
-class FieldDef;
-class MessageDef;
-
-namespace proto2_bridge {
-
-// Unfinalized defs ////////////////////////////////////////////////////////////
-
-// Creating of UNFINALIZED defs.  All of these functions return defs that are
-// still mutable and have not been finalized.  They must be finalized before
-// using them to parse anything.  This is useful if you want more control over
-// the process of constructing defs, eg. to add the specific set of fields you
-// care about.
-
-// Creates a new upb::MessageDef that corresponds to the type in the given
-// prototype message.  The MessageDef will not have any fields added to it.
-upb::MessageDef *NewEmptyMessageDef(const proto2::Message& m, void *owner);
-upb::MessageDef *NewEmptyMessageDef(const google::protobuf::Message& desc,
-                                    void *owner);
-
-// Adds a new upb::FieldDef to the given MessageDef corresponding to the given
-// FieldDescriptor.  The FieldDef will be given an accessor and offset so that
-// it can be used to read and write data into the proto2::Message classes.
-// The given MessageDef must have been constructed with NewEmptyDefForMessage()
-// and f->containing_type() must correspond to the message that was used.
-//
-// Any submessage, group, or enum fields will be given symbolic references to
-// the subtype, which must be resolved before the MessageDef can be finalized.
-//
-// On success, returns the FieldDef that was added (caller does not own a ref).
-// If an existing field had the same name or number, returns NULL.
-upb::FieldDef* AddFieldDef(const proto2::FieldDescriptor* f,
-                           upb::MessageDef* md);
-upb::FieldDef* AddFieldDef(const google::protobuf::FieldDescriptor* f,
-                           upb::MessageDef* md);
-
-// Given a MessageDef that was constructed with NewEmptyDefForMessage(), adds
-// FieldDefs for all fields defined in the original message, but not for any
-// extensions or unknown fields.  The given MessageDef must not have any fields
-// that have the same name or number as any of the fields we are adding (the
-// easiest way to guarantee this is to start with an empty MessageDef).
-//
-// Returns true on success or false if any of the fields could not be added.
-void AddAllFields(upb::MessageDef* md);
-
-// TODO(haberman): Add:
-// // Adds a handler that will store unknown fields in the UnknownFieldSet.
-// void AddUnknownFieldHandler(upb::MessageDef* md);
-
-// Returns a new upb::MessageDef that contains handlers for all fields, unknown
-// fields, and any extensions in the descriptor's pool.  The resulting
-// def/handlers should be equivalent to the generated code constructed by the
-// protobuf compiler (or the code in DynamicMessage) for the given type.
-// The subdefs for message/enum fields (if any) will be referenced symbolically,
-// and will need to be resolved before being finalized.
-//
-// TODO(haberman): Add missing support (LazyField, MessageSet, and extensions).
-//
-// TODO(haberman): possibly add a similar function that lets you supply a
-// separate DescriptorPool and MessageFactory for extensions, to support
-// proto2's io::CodedInputStream::SetExtensionRegistry().
-upb::MessageDef* NewFullMessageDef(const proto2::Message& m, void *owner);
-upb::MessageDef* NewFullMessageDef(const google::protobuf::Message& m,
-                                   void *owner);
-
-// Returns a new upb::EnumDef that corresponds to the given EnumDescriptor.
-// Caller owns a ref on the returned EnumDef.
-upb::EnumDef* NewEnumDef(const proto2::EnumDescriptor* desc, void *owner);
-upb::EnumDef* NewEnumDef(const google::protobuf::EnumDescriptor* desc,
-                         void *owner);
-
-// Finalized defs //////////////////////////////////////////////////////////////
-
-// These functions return FINALIZED defs, meaning that they are immutable and
-// ready for use.  Since they are immutable you cannot make any further changes
-// to eg. the set of fields, but these functions are more convenient if you
-// simply want to parse a message exactly how the built-in proto2 parser would.
-
-// Creates a returns a finalized MessageDef for the give message and its entire
-// type tree that will include all fields and unknown handlers (ie. it will
-// parse just like proto2 would).
-const upb::MessageDef* NewFinalMessageDef(const proto2::Message& m,
-                                          void *owner);
-const upb::MessageDef* NewFinalMessageDef(const google::protobuf::Message& m,
-                                          void *owner);
-
-}  // namespace proto2_bridge
-}  // namespace upb
-
-#endif
--- a/bindings/cpp/upb/upb.hpp
+++ b/bindings/cpp/upb/upb.hpp
@ -1,81 +0,0 @@
-//
-// upb - a minimalist implementation of protocol buffers.
-//
-// Copyright (c) 2011 Google Inc.  See LICENSE for details.
-// Author: Josh Haberman <jhaberman@gmail.com>
-
-#ifndef UPB_HPP
-#define UPB_HPP
-
-#include "upb/upb.h"
-#include <iostream>
-
-#if defined(__GXX_EXPERIMENTAL_CXX0X__) && !defined(UPB_NO_CXX11)
-#define UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(class_name) \
-  class_name() = delete; \
-  ~class_name() = delete;
-#else
-#define UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(class_name) \
-  class_name(); \
-  ~class_name();
-#endif
-
-namespace upb {
-
-typedef upb_success_t Success;
-
-class Status : public upb_status {
- public:
-  Status() { upb_status_init(this); }
-  ~Status() { upb_status_uninit(this); }
-
-  bool ok() const { return upb_ok(this); }
-  bool eof() const { return upb_eof(this); }
-
-  const char *GetString() const { return upb_status_getstr(this); }
-  void SetEof() { upb_status_seteof(this); }
-  void SetErrorLiteral(const char* msg) {
-    upb_status_seterrliteral(this, msg);
-  }
-
-  void Clear() { upb_status_clear(this); }
-};
-
-typedef upb_value Value;
-
-template <typename T> T GetValue(Value v);
-template <typename T> Value MakeValue(T v);
-
-#define UPB_VALUE_ACCESSORS(type, ctype) \
-  template <> inline ctype GetValue<ctype>(Value v) { \
-    return upb_value_get ## type(v); \
-  } \
-  template <> inline Value MakeValue<ctype>(ctype v) { \
-    return upb_value_ ## type(v); \
-  }
-
-UPB_VALUE_ACCESSORS(double, double);
-UPB_VALUE_ACCESSORS(float,  float);
-UPB_VALUE_ACCESSORS(int32,  int32_t);
-UPB_VALUE_ACCESSORS(int64,  int64_t);
-UPB_VALUE_ACCESSORS(uint32, uint32_t);
-UPB_VALUE_ACCESSORS(uint64, uint64_t);
-UPB_VALUE_ACCESSORS(bool,   bool);
-
-#undef UPB_VALUE_ACCESSORS
-
-template <typename T> inline T* GetPtrValue(Value v) {
-  return static_cast<T*>(upb_value_getptr(v));
-}
-template <typename T> inline Value MakePtrValue(T* v) {
-  return upb_value_ptr(static_cast<void*>(v));
-}
-
-INLINE std::ostream& operator<<(std::ostream& out, const Status& status) {
-  out << status.GetString();
-  return out;
-}
-
-}  // namespace upb
-
-#endif
--- a/bindings/linux/Makefile
+++ b/bindings/linux/Makefile
@ -1,7 +1,6 @@
 obj-m = upb.o

 upb-objs = \
-  setjmp.o \
  ../../upb/upb.o \
  ../../upb/bytestream.o \
  ../../upb/def.o \
@ -9,9 +8,6 @@ upb-objs = \
  ../../upb/table.o \
  ../../upb/refcount.o \
  ../../upb/msg.o \
-  ../../upb/pb/decoder.o \
-  ../../upb/pb/textprinter.o \
-  ../../upb/pb/varint.o \

 KVERSION = $(shell uname -r)

--- a/bindings/linux/ctype.h
+++ b/bindings/linux/ctype.h
@ -1,8 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2012 Google Inc.  See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- */
-
-#include <linux/ctype.h>
--- a/bindings/linux/inttypes.h
+++ b/bindings/linux/inttypes.h
@ -1,22 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2012 Google Inc.  See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- */
-
-#ifndef PRId64
-#define PRId64 "ld"
-#endif
-
-#ifndef PRIu64
-#define PRIu64 "lu"
-#endif
-
-#ifndef PRId32
-#define PRId32 "d"
-#endif
-
-#ifndef PRIu32
-#define PRIu32 "u"
-#endif
--- a/bindings/linux/setjmp.S
+++ b/bindings/linux/setjmp.S
@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2003 Peter Wemm.
- * Copyright (c) 1993 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-.globl _setjmp, _longjmp
-
-_setjmp:
-  movq    %rbx,0(%rdi)                    /* save rbx */
-  movq    %rsp,8(%rdi)                    /* save rsp */
-  movq    %rbp,16(%rdi)                   /* save rbp */
-  movq    %r12,24(%rdi)                   /* save r12 */
-  movq    %r13,32(%rdi)                   /* save r13 */
-  movq    %r14,40(%rdi)                   /* save r14 */
-  movq    %r15,48(%rdi)                   /* save r15 */
-  movq    0(%rsp),%rdx                    /* get rta */
-  movq    %rdx,56(%rdi)                   /* save rip */
-  xorl    %eax,%eax                       /* return(0); */
-  ret
-
-_longjmp:
-  movq    0(%rdi),%rbx                    /* restore rbx */
-  movq    8(%rdi),%rsp                    /* restore rsp */
-  movq    16(%rdi),%rbp                   /* restore rbp */
-  movq    24(%rdi),%r12                   /* restore r12 */
-  movq    32(%rdi),%r13                   /* restore r13 */
-  movq    40(%rdi),%r14                   /* restore r14 */
-  movq    48(%rdi),%r15                   /* restore r15 */
-  movq    56(%rdi),%rdx                   /* get rta */
-  movq    %rdx,0(%rsp)                    /* put in return frame */
-  xorl    %eax,%eax                       /* return(1); */
-  incl    %eax
-  ret
--- a/bindings/linux/setjmp.h
+++ b/bindings/linux/setjmp.h
@ -1,13 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2012 Google Inc.  See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- */
-
-// Linux doesn't provide setjmp/longjmp, boo.
-
-typedef void *jmp_buf[8];
-
-extern int _setjmp(jmp_buf env);
-__attribute__((__noreturn__)) extern void _longjmp(jmp_buf env, int val);
--- a/bindings/linux/string.h
+++ b/bindings/linux/string.h
@ -9,18 +9,5 @@
 #define UPB_LINUX_STRING_H_

 #include <linux/string.h>
-#include <stdlib.h>
-#include "upb/upb.h"  // For INLINE.
-
-INLINE char *strdup(const char *s) {
-  size_t len = strlen(s);
-  char *ret = malloc(len + 1);
-  if (ret == NULL) return NULL;
-  // Be particularly defensive and guard against buffer overflow if there
-  // is a concurrent mutator.
-  strncpy(ret, s, len);
-  ret[len] = '\0';
-  return ret;
-}

 #endif  /* UPB_DEF_H_ */
--- a/bindings/lua/LICENSE
+++ b/bindings/lua/LICENSE
@ -0,0 +1,32 @@
+
+Lunit License
+-------------
+
+Lunit is written by Michael Roth <mroth@nessie.de> and is licensed
+under the terms of the MIT license reproduced below.
+
+========================================================================
+
+Copyright (c) 2004-2010 Michael Roth <mroth@nessie.de>
+
+Permission is hereby granted, free of charge, to any person 
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without restriction,
+including without limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of the Software,
+and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be 
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+========================================================================
+
--- a/bindings/lua/lunitx/atexit.lua
+++ b/bindings/lua/lunitx/atexit.lua
@ -0,0 +1,32 @@
+
+local actions = {}
+
+local atexit
+
+if _VERSION >= 'Lua 5.2' then 
+
+    atexit = function (fn)
+        actions[#actions+1] = setmetatable({}, { __gc = fn })
+    end
+    
+else
+
+    local newproxy = newproxy
+    local debug = debug
+    local assert = assert
+    local setmetatable = setmetatable
+
+    local function gc(fn)
+        local p = assert(newproxy())
+        assert(debug.setmetatable(p, { __gc = fn }))
+        return p
+    end
+
+    atexit = function (fn)
+        actions[#actions+1] = gc(fn)
+    end
+    
+end
+
+return atexit
+
--- a/bindings/lua/lunitx/lunit.lua
+++ b/bindings/lua/lunitx/lunit.lua
@ -0,0 +1,725 @@
+--[[--------------------------------------------------------------------------
+
+    This file is part of lunit 0.5.
+
+    For Details about lunit look at: http://www.mroth.net/lunit/
+
+    Author: Michael Roth <mroth@nessie.de>
+
+    Copyright (c) 2004, 2006-2010 Michael Roth <mroth@nessie.de>
+
+    Permission is hereby granted, free of charge, to any person 
+    obtaining a copy of this software and associated documentation
+    files (the "Software"), to deal in the Software without restriction,
+    including without limitation the rights to use, copy, modify, merge,
+    publish, distribute, sublicense, and/or sell copies of the Software,
+    and to permit persons to whom the Software is furnished to do so,
+    subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be 
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+--]]--------------------------------------------------------------------------
+
+
+local orig_assert     = assert
+
+local pairs           = pairs
+local ipairs          = ipairs
+local next            = next
+local type            = type
+local error           = error
+local tostring        = tostring
+local setmetatable    = setmetatable
+local pcall           = pcall
+local xpcall          = xpcall
+local require         = require
+local loadfile        = loadfile
+
+local string_sub      = string.sub
+local string_gsub     = string.gsub
+local string_format   = string.format
+local string_lower    = string.lower
+local string_find     = string.find
+
+local table_concat    = table.concat
+
+local debug_getinfo   = debug.getinfo
+
+local _G = _G
+
+local lunit
+
+if _VERSION >= 'Lua 5.2' then 
+
+    lunit = {}
+    _ENV = lunit
+    
+else
+
+    module("lunit")
+    lunit = _M
+    
+end
+
+
+local __failure__ = {}    -- Type tag for failed assertions
+
+local typenames = { "nil", "boolean", "number", "string", "table", "function", "thread", "userdata" }
+
+
+local traceback_hide      -- Traceback function which hides lunit internals
+local mypcall             -- Protected call to a function with own traceback
+do
+  local _tb_hide = setmetatable( {}, {__mode="k"} )
+
+  function traceback_hide(func)
+    _tb_hide[func] = true
+  end
+
+  local function my_traceback(errobj)
+    if is_table(errobj) and errobj.type == __failure__ then
+      local info = debug_getinfo(5, "Sl")   -- FIXME: Hardcoded integers are bad...
+      errobj.where = string_format( "%s:%d", info.short_src, info.currentline)
+    else
+      errobj = { msg = tostring(errobj) }
+      errobj.tb = {}
+      local i = 2
+      while true do
+        local info = debug_getinfo(i, "Snlf")
+        if not is_table(info) then
+          break
+        end
+        if not _tb_hide[info.func] then
+          local line = {}       -- Ripped from ldblib.c...
+          line[#line+1] = string_format("%s:", info.short_src)
+          if info.currentline > 0 then
+            line[#line+1] = string_format("%d:", info.currentline)
+          end
+          if info.namewhat ~= "" then
+            line[#line+1] = string_format(" in function '%s'", info.name)
+          else
+            if info.what == "main" then
+              line[#line+1] = " in main chunk"
+            elseif info.what == "C" or info.what == "tail" then
+              line[#line+1] = " ?"
+            else
+              line[#line+1] = string_format(" in function <%s:%d>", info.short_src, info.linedefined)
+            end
+          end
+          errobj.tb[#errobj.tb+1] = table_concat(line)
+        end
+        i = i + 1
+      end
+    end
+    return errobj
+  end
+
+  function mypcall(func)
+    orig_assert( is_function(func) )
+    local ok, errobj = xpcall(func, my_traceback)
+    if not ok then
+      return errobj
+    end
+  end
+  traceback_hide(mypcall)
+end
+
+
+-- Type check functions
+
+for _, typename in ipairs(typenames) do
+  lunit["is_"..typename] = function(x)
+    return type(x) == typename
+  end
+end
+
+local is_nil      = is_nil
+local is_boolean  = is_boolean
+local is_number   = is_number
+local is_string   = is_string
+local is_table    = is_table
+local is_function = is_function
+local is_thread   = is_thread
+local is_userdata = is_userdata
+
+
+local function failure(name, usermsg, defaultmsg, ...)
+  local errobj = {
+    type    = __failure__,
+    name    = name,
+    msg     = string_format(defaultmsg,...),
+    usermsg = usermsg
+  }
+  error(errobj, 0)
+end
+traceback_hide( failure )
+
+
+local function format_arg(arg)
+  local argtype = type(arg)
+  if argtype == "string" then
+    return "'"..arg.."'"
+  elseif argtype == "number" or argtype == "boolean" or argtype == "nil" then
+    return tostring(arg)
+  else
+    return "["..tostring(arg).."]"
+  end
+end
+
+
+local function selected(map, name)
+    if not map then
+        return true
+    end
+
+    local m = {}
+    for k,v in pairs(map) do
+        m[k] = lunitpat2luapat(v)
+    end
+    return in_patternmap(m, name)
+end
+
+
+function fail(msg)
+  stats.assertions = stats.assertions + 1
+  failure( "fail", msg, "failure" )
+end
+traceback_hide( fail )
+
+
+function assert(assertion, msg)
+  stats.assertions = stats.assertions + 1
+  if not assertion then
+    failure( "assert", msg, "assertion failed" )
+  end
+  return assertion
+end
+traceback_hide( assert )
+
+
+function assert_true(actual, msg)
+  stats.assertions = stats.assertions + 1
+  if actual ~= true then
+    failure( "assert_true", msg, "true expected but was %s", format_arg(actual) )
+  end
+  return actual
+end
+traceback_hide( assert_true )
+
+
+function assert_false(actual, msg)
+  stats.assertions = stats.assertions + 1
+  if actual ~= false then
+    failure( "assert_false", msg, "false expected but was %s", format_arg(actual) )
+  end
+  return actual
+end
+traceback_hide( assert_false )
+
+
+function assert_equal(expected, actual, msg)
+  stats.assertions = stats.assertions + 1
+  if expected ~= actual then
+    failure( "assert_equal", msg, "expected %s but was %s", format_arg(expected), format_arg(actual) )
+  end
+  return actual
+end
+traceback_hide( assert_equal )
+
+
+function assert_not_equal(unexpected, actual, msg)
+  stats.assertions = stats.assertions + 1
+  if unexpected == actual then
+    failure( "assert_not_equal", msg, "%s not expected but was one", format_arg(unexpected) )
+  end
+  return actual
+end
+traceback_hide( assert_not_equal )
+
+
+function assert_match(pattern, actual, msg)
+  stats.assertions = stats.assertions + 1
+  if type(pattern) ~= "string" then
+    failure( "assert_match", msg, "expected a string as pattern but was %s", format_arg(pattern) )
+  end
+  if type(actual) ~= "string" then
+    failure( "assert_match", msg, "expected a string to match pattern '%s' but was a %s", pattern, format_arg(actual) )
+  end
+  if not string_find(actual, pattern) then
+    failure( "assert_match", msg, "expected '%s' to match pattern '%s' but doesn't", actual, pattern )
+  end
+  return actual
+end
+traceback_hide( assert_match )
+
+
+function assert_not_match(pattern, actual, msg)
+  stats.assertions = stats.assertions + 1
+  if type(pattern) ~= "string" then
+    failure( "assert_not_match", msg, "expected a string as pattern but was %s", format_arg(pattern) )
+  end
+  if type(actual) ~= "string" then
+    failure( "assert_not_match", msg, "expected a string to not match pattern '%s' but was %s", pattern, format_arg(actual) )
+  end
+  if string_find(actual, pattern) then
+    failure( "assert_not_match", msg, "expected '%s' to not match pattern '%s' but it does", actual, pattern )
+  end
+  return actual
+end
+traceback_hide( assert_not_match )
+
+
+function assert_error(msg, func)
+  stats.assertions = stats.assertions + 1
+  if func == nil then
+    func, msg = msg, nil
+  end
+  if type(func) ~= "function" then
+    failure( "assert_error", msg, "expected a function as last argument but was %s", format_arg(func) )
+  end
+  local ok, errmsg = pcall(func)
+  if ok then
+    failure( "assert_error", msg, "error expected but no error occurred" )
+  end
+end
+traceback_hide( assert_error )
+
+
+function assert_error_match(msg, pattern, func)
+  stats.assertions = stats.assertions + 1
+  if func == nil then
+    msg, pattern, func = nil, msg, pattern
+  end
+  if type(pattern) ~= "string" then
+    failure( "assert_error_match", msg, "expected the pattern as a string but was %s", format_arg(pattern) )
+  end
+  if type(func) ~= "function" then
+    failure( "assert_error_match", msg, "expected a function as last argument but was %s", format_arg(func) )
+  end
+  local ok, errmsg = pcall(func)
+  if ok then
+    failure( "assert_error_match", msg, "error expected but no error occurred" )
+  end
+  if type(errmsg) ~= "string" then
+    failure( "assert_error_match", msg, "error as string expected but was %s", format_arg(errmsg) )
+  end
+  if not string_find(errmsg, pattern) then
+    failure( "assert_error_match", msg, "expected error '%s' to match pattern '%s' but doesn't", errmsg, pattern )
+  end
+end
+traceback_hide( assert_error_match )
+
+
+function assert_pass(msg, func)
+  stats.assertions = stats.assertions + 1
+  if func == nil then
+    func, msg = msg, nil
+  end
+  if type(func) ~= "function" then
+    failure( "assert_pass", msg, "expected a function as last argument but was %s", format_arg(func) )
+  end
+  local ok, errmsg = pcall(func)
+  if not ok then
+    failure( "assert_pass", msg, "no error expected but error was: '%s'", errmsg )
+  end
+end
+traceback_hide( assert_pass )
+
+
+-- lunit.assert_typename functions
+
+for _, typename in ipairs(typenames) do
+  local assert_typename = "assert_"..typename
+  lunit[assert_typename] = function(actual, msg)
+    stats.assertions = stats.assertions + 1
+    if type(actual) ~= typename then
+      failure( assert_typename, msg, "%s expected but was %s", typename, format_arg(actual) )
+    end
+    return actual
+  end
+  traceback_hide( lunit[assert_typename] )
+end
+
+
+-- lunit.assert_not_typename functions
+
+for _, typename in ipairs(typenames) do
+  local assert_not_typename = "assert_not_"..typename
+  lunit[assert_not_typename] = function(actual, msg)
+    stats.assertions = stats.assertions + 1
+    if type(actual) == typename then
+      failure( assert_not_typename, msg, typename.." not expected but was one" )
+    end
+  end
+  traceback_hide( lunit[assert_not_typename] )
+end
+
+
+function lunit.clearstats()
+  stats = {
+    assertions  = 0;
+    passed      = 0;
+    failed      = 0;
+    errors      = 0;
+  }
+end
+
+
+local report, reporterrobj
+do
+  local testrunner
+
+  function lunit.setrunner(newrunner)
+    if not ( is_table(newrunner) or is_nil(newrunner) ) then
+      return error("lunit.setrunner: Invalid argument", 0)
+    end
+    local oldrunner = testrunner
+    testrunner = newrunner
+    return oldrunner
+  end
+
+  function lunit.loadrunner(name)
+    if not is_string(name) then
+      return error("lunit.loadrunner: Invalid argument", 0)
+    end
+    local ok, runner = pcall( require, name )
+    if not ok then
+      return error("lunit.loadrunner: Can't load test runner: "..runner, 0)
+    end
+    return setrunner(runner)
+  end
+
+  function lunit.getrunner()
+    return testrunner
+  end
+
+  function report(event, ...)
+    local f = testrunner and testrunner[event]
+    if is_function(f) then
+      pcall(f, ...)
+    end
+  end
+
+  function reporterrobj(context, tcname, testname, errobj)
+    local fullname = tcname .. "." .. testname
+    if context == "setup" then
+      fullname = fullname .. ":" .. setupname(tcname, testname)
+    elseif context == "teardown" then
+      fullname = fullname .. ":" .. teardownname(tcname, testname)
+    end
+    if errobj.type == __failure__ then
+      stats.failed = stats.failed + 1
+      report("fail", fullname, errobj.where, errobj.msg, errobj.usermsg)
+    else
+      stats.errors = stats.errors + 1
+      report("err", fullname, errobj.msg, errobj.tb)
+    end
+  end
+end
+
+
+
+local function key_iter(t, k)
+    return (next(t,k))
+end
+
+
+local testcase
+do
+  -- Array with all registered testcases
+  local _testcases = {}
+
+  -- Marks a module as a testcase.
+  -- Applied over a module from module("xyz", lunit.testcase).
+  function lunit.testcase(m)
+    orig_assert( is_table(m) )
+    --orig_assert( m._M == m )
+    orig_assert( is_string(m._NAME) )
+    --orig_assert( is_string(m._PACKAGE) )
+
+    -- Register the module as a testcase
+    _testcases[m._NAME] = m
+
+    -- Import lunit, fail, assert* and is_* function to the module/testcase
+    m.lunit = lunit
+    m.fail = lunit.fail
+    for funcname, func in pairs(lunit) do
+      if "assert" == string_sub(funcname, 1, 6) or "is_" == string_sub(funcname, 1, 3) then
+        m[funcname] = func
+      end
+    end
+  end
+  
+  function lunit.module(name,seeall)
+    local m = {}
+    if seeall == "seeall" then
+      setmetatable(m, { __index = _G })
+    end
+    m._NAME = name
+    lunit.testcase(m)
+    return m
+  end
+
+  -- Iterator (testcasename) over all Testcases
+  function lunit.testcases()
+    -- Make a copy of testcases to prevent confusing the iterator when
+    -- new testcase are defined
+    local _testcases2 = {}
+    for k,v in pairs(_testcases) do
+        _testcases2[k] = true
+    end
+    return key_iter, _testcases2, nil
+  end
+
+  function testcase(tcname)
+    return _testcases[tcname]
+  end
+end
+
+
+do
+  -- Finds a function in a testcase case insensitive
+  local function findfuncname(tcname, name)
+    for key, value in pairs(testcase(tcname)) do
+      if is_string(key) and is_function(value) and string_lower(key) == name then
+        return key
+      end
+    end
+  end
+
+  function lunit.setupname(tcname)
+    return findfuncname(tcname, "setup")
+  end
+
+  function lunit.teardownname(tcname)
+    return findfuncname(tcname, "teardown")
+  end
+
+  -- Iterator over all test names in a testcase.
+  -- Have to collect the names first in case one of the test
+  -- functions creates a new global and throws off the iteration.
+  function lunit.tests(tcname)
+    local testnames = {}
+    for key, value in pairs(testcase(tcname)) do
+      if is_string(key) and is_function(value) then
+        local lfn = string_lower(key)
+        if string_sub(lfn, 1, 4) == "test" or string_sub(lfn, -4) == "test" then
+          testnames[key] = true
+        end
+      end
+    end
+    return key_iter, testnames, nil
+  end
+end
+
+
+
+
+function lunit.runtest(tcname, testname)
+  orig_assert( is_string(tcname) )
+  orig_assert( is_string(testname) )
+
+  if (not getrunner()) then
+    loadrunner("lunit.console")
+  end
+
+  local function callit(context, func)
+    if func then
+      local err = mypcall(func)
+      if err then
+        reporterrobj(context, tcname, testname, err)
+        return false
+      end
+    end
+    return true
+  end
+  traceback_hide(callit)
+
+  report("run", tcname, testname)
+
+  local tc          = testcase(tcname)
+  local setup       = tc[setupname(tcname)]
+  local test        = tc[testname]
+  local teardown    = tc[teardownname(tcname)]
+
+  local setup_ok    =              callit( "setup", setup )
+  local test_ok     = setup_ok and callit( "test", test )
+  local teardown_ok = setup_ok and callit( "teardown", teardown )
+
+  if setup_ok and test_ok and teardown_ok then
+    stats.passed = stats.passed + 1
+    report("pass", tcname, testname)
+  end
+end
+traceback_hide(runtest)
+
+
+
+function lunit.run(testpatterns)
+  clearstats()
+  report("begin")
+  for testcasename in lunit.testcases() do
+    -- Run tests in the testcases
+    for testname in lunit.tests(testcasename) do
+      if selected(testpatterns, testname) then
+        runtest(testcasename, testname)
+      end
+    end
+  end
+  report("done")
+  return stats
+end
+traceback_hide(run)
+
+
+function lunit.loadonly()
+  clearstats()
+  report("begin")
+  report("done")
+  return stats
+end
+
+
+
+
+
+
+
+
+
+local lunitpat2luapat
+do 
+  local conv = {
+    ["^"] = "%^",
+    ["$"] = "%$",
+    ["("] = "%(",
+    [")"] = "%)",
+    ["%"] = "%%",
+    ["."] = "%.",
+    ["["] = "%[",
+    ["]"] = "%]",
+    ["+"] = "%+",
+    ["-"] = "%-",
+    ["?"] = ".",
+    ["*"] = ".*"
+  }
+  function lunitpat2luapat(str)
+    --return "^" .. string.gsub(str, "%W", conv) .. "$"
+    -- Above was very annoying, if I want to run all the tests having to do with
+    -- RSS, I want to be able to do "-t rss"   not "-t \*rss\*".
+    return string_gsub(str, "%W", conv)
+  end
+end
+
+
+
+local function in_patternmap(map, name)
+  if map[name] == true then
+    return true
+  else
+    for _, pat in ipairs(map) do
+      if string_find(name, pat) then
+        return true
+      end
+    end
+  end
+  return false
+end
+
+
+
+
+
+
+
+
+-- Called from 'lunit' shell script.
+
+function main(argv)
+  argv = argv or {}
+
+  -- FIXME: Error handling and error messages aren't nice.
+
+  local function checkarg(optname, arg)
+    if not is_string(arg) then
+      return error("lunit.main: option "..optname..": argument missing.", 0)
+    end
+  end
+
+  local function loadtestcase(filename)
+    if not is_string(filename) then
+      return error("lunit.main: invalid argument")
+    end
+    local chunk, err = loadfile(filename)
+    if err then
+      return error(err)
+    else
+      chunk()
+    end
+  end
+
+  local testpatterns = nil
+  local doloadonly = false
+
+  local i = 0
+  while i < #argv do
+    i = i + 1
+    local arg = argv[i]
+    if arg == "--loadonly" then
+      doloadonly = true
+    elseif arg == "--runner" or arg == "-r" then
+      local optname = arg; i = i + 1; arg = argv[i]
+      checkarg(optname, arg)
+      loadrunner(arg)
+    elseif arg == "--test" or arg == "-t" then
+      local optname = arg; i = i + 1; arg = argv[i]
+      checkarg(optname, arg)
+      testpatterns = testpatterns or {}
+      testpatterns[#testpatterns+1] = arg
+    elseif arg == "--help" or arg == "-h" then
+        print[[
+lunit 0.5
+Copyright (c) 2004-2009 Michael Roth <mroth@nessie.de>
+This program comes WITHOUT WARRANTY OF ANY KIND.
+
+Usage: lua test [OPTIONS] [--] scripts
+
+Options:
+
+  -r, --runner RUNNER         Testrunner to use, defaults to 'lunit-console'.
+  -t, --test PATTERN          Which tests to run, may contain * or ? wildcards.
+      --loadonly              Only load the tests.
+  -h, --help                  Print this help screen.
+
+Please report bugs to <mroth@nessie.de>.
+]]
+        return
+    elseif arg == "--" then
+      while i < #argv do
+        i = i + 1; arg = argv[i]
+        loadtestcase(arg)
+      end
+    else
+      loadtestcase(arg)
+    end
+  end
+
+  if doloadonly then
+    return loadonly()
+  else
+    return run(testpatterns)
+  end
+end
+
+clearstats()
+
+return lunit
--- a/bindings/lua/lunitx/lunit/console.lua
+++ b/bindings/lua/lunitx/lunit/console.lua
@ -0,0 +1,156 @@
+
+--[[--------------------------------------------------------------------------
+
+    This file is part of lunit 0.5.
+
+    For Details about lunit look at: http://www.mroth.net/lunit/
+
+    Author: Michael Roth <mroth@nessie.de>
+
+    Copyright (c) 2006-2008 Michael Roth <mroth@nessie.de>
+
+    Permission is hereby granted, free of charge, to any person 
+    obtaining a copy of this software and associated documentation
+    files (the "Software"), to deal in the Software without restriction,
+    including without limitation the rights to use, copy, modify, merge,
+    publish, distribute, sublicense, and/or sell copies of the Software,
+    and to permit persons to whom the Software is furnished to do so,
+    subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be 
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+--]]--------------------------------------------------------------------------
+
+
+
+--[[
+
+      begin()
+        run(testcasename, testname)
+          err(fullname, message, traceback)
+          fail(fullname, where, message, usermessage)
+          pass(testcasename, testname)
+      done()
+
+      Fullname:
+        testcase.testname
+        testcase.testname:setupname
+        testcase.testname:teardownname
+
+--]]
+
+
+lunit = require "lunit"
+
+local lunit_console
+
+if _VERSION >= 'Lua 5.2' then 
+
+    lunit_console = setmetatable({},{__index = _ENV})
+    _ENV = lunit_console
+    
+else
+
+    module( "lunit-console", package.seeall )
+    lunit_console = _M
+    
+end
+
+
+
+local function printformat(format, ...)
+  io.write( string.format(format, ...) )
+end
+
+
+local columns_printed = 0
+
+local function writestatus(char)
+  if columns_printed == 0 then
+    io.write("    ")
+  end
+  if columns_printed == 60 then
+    io.write("\n    ")
+    columns_printed = 0
+  end
+  io.write(char)
+  io.flush()
+  columns_printed = columns_printed + 1
+end
+
+
+local msgs = {}
+
+
+function begin()
+  local total_tc = 0
+  local total_tests = 0
+  
+  msgs = {} -- e
+
+  for tcname in lunit.testcases() do
+    total_tc = total_tc + 1
+    for testname, test in lunit.tests(tcname) do
+      total_tests = total_tests + 1
+    end
+  end
+
+  printformat("Loaded testsuite with %d tests in %d testcases.\n\n", total_tests, total_tc)
+end
+
+
+function run(testcasename, testname)
+  -- NOP
+end
+
+
+function err(fullname, message, traceback)
+  writestatus("E")
+  msgs[#msgs+1] = "Error! ("..fullname.."):\n"..message.."\n\t"..table.concat(traceback, "\n\t") .. "\n"
+end
+
+
+function fail(fullname, where, message, usermessage)
+  writestatus("F")
+  local text =  "Failure ("..fullname.."):\n"..
+                where..": "..message.."\n"
+
+  if usermessage then
+    text = text .. where..": "..usermessage.."\n"
+  end
+
+  msgs[#msgs+1] = text
+end
+
+
+function pass(testcasename, testname)
+  writestatus(".")
+end
+
+
+
+function done()
+  printformat("\n\n%d Assertions checked.\n", lunit.stats.assertions )
+  print()
+
+  for i, msg in ipairs(msgs) do
+    printformat( "%3d) %s\n", i, msg )
+  end
+
+  printformat("Testsuite finished (%d passed, %d failed, %d errors).\n",
+      lunit.stats.passed, lunit.stats.failed, lunit.stats.errors )
+end
+
+
+return lunit_console
+
+
--- a/bindings/lua/lunitx/lunitx.lua
+++ b/bindings/lua/lunitx/lunitx.lua
@ -0,0 +1,21 @@
+local atexit = require "atexit"
+local lunit = require "lunit"
+
+--for k,v in pairs(debug.getinfo(1,"S")) do print(k,v) end
+-- autonameing
+-- module("bcrc-test", lunit.testcase, package.seeall)
+
+atexit(function()
+    local _, emsg = xpcall(function()
+        lunit.main(arg)
+    end, debug.traceback)
+    if emsg then
+        print(emsg)
+        os.exit(1)
+    end
+    if lunit.stats.failed > 0 or lunit.stats.errors > 0 then
+      os.exit(1)
+    end
+end)
+
+return lunit
--- a/bindings/lua/table.c
+++ b/bindings/lua/table.c
@ -0,0 +1,167 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2012 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Lua extension that provides access to upb_table.  This is an internal-only
+ * interface and exists for the sole purpose of writing a C code generator in
+ * Lua that can dump a upb_table as static C initializers.  This lets us use
+ * Lua for convenient string manipulation while saving us from re-implementing
+ * the upb_table hash function and hash table layout / collision strategy in
+ * Lua.
+ *
+ * Since this is used only as part of the toolchain (and not part of the
+ * runtime) we do not hold this module to the same stringent requirements as
+ * the main Lua modules (for example that misbehaving Lua programs cannot
+ * crash the interpreter).
+ */
+
+#include <float.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "lauxlib.h"
+#include "bindings/lua/upb.h"
+#include "upb/def.h"
+
+static void lupbtable_setnum(lua_State *L, int tab, const char *key,
+                             lua_Number val) {
+  lua_pushnumber(L, val);
+  lua_setfield(L, tab - 1, key);
+}
+
+static void lupbtable_pushval(lua_State *L, upb_value val, upb_ctype_t type) {
+  switch (type) {
+    case UPB_CTYPE_INT32:
+      lua_pushnumber(L, upb_value_getint32(val));
+      break;
+    case UPB_CTYPE_PTR:
+      lupb_def_pushwrapper(L, upb_value_getptr(val), NULL);
+      break;
+    case UPB_CTYPE_CSTR:
+      lua_pushstring(L, upb_value_getcstr(val));
+      break;
+    default:
+      luaL_error(L, "Unexpected type: %d", type);
+  }
+}
+
+// Sets a few fields common to both hash table entries and arrays.
+static void lupbtable_setmetafields(lua_State *L, int type, const void *ptr) {
+  // We tack this onto every entry so we know it even if the entries
+  // don't stay with the table.
+  lua_pushnumber(L, type);
+  lua_setfield(L, -2, "valtype");
+
+  // Set this to facilitate linking.
+  lua_pushlightuserdata(L, (void*)ptr);
+  lua_setfield(L, -2, "ptr");
+}
+
+static void lupbtable_pushent(lua_State *L, const upb_tabent *e,
+                              bool inttab, int type) {
+  lua_newtable(L);
+  if (!upb_tabent_isempty(e)) {
+    if (inttab) {
+      lua_pushnumber(L, e->key.num);
+    } else {
+      lua_pushstring(L, e->key.str);
+    }
+    lua_setfield(L, -2, "key");
+    lupbtable_pushval(L, e->val, type);
+    lua_setfield(L, -2, "value");
+  }
+  lua_pushlightuserdata(L, (void*)e->next);
+  lua_setfield(L, -2, "next");
+  lupbtable_setmetafields(L, type, e);
+}
+
+// Dumps the shared part of upb_table into a Lua table.
+static void lupbtable_pushtable(lua_State *L, const upb_table *t, bool inttab) {
+  lua_newtable(L);
+  lupbtable_setnum(L, -1, "count", t->count);
+  lupbtable_setnum(L, -1, "mask",  t->mask);
+  lupbtable_setnum(L, -1, "type",  t->type);
+  lupbtable_setnum(L, -1, "size_lg2",  t->size_lg2);
+
+  lua_newtable(L);
+  for (int i = 0; i < upb_table_size(t); i++) {
+    lupbtable_pushent(L, &t->entries[i], inttab, t->type);
+    lua_rawseti(L, -2, i + 1);
+  }
+  lua_setfield(L, -2, "entries");
+}
+
+// Dumps a upb_inttable to a Lua table.
+static void lupbtable_pushinttable(lua_State *L, const upb_inttable *t) {
+  lupbtable_pushtable(L, &t->t, true);
+  lupbtable_setnum(L, -1, "array_size", t->array_size);
+  lupbtable_setnum(L, -1, "array_count", t->array_count);
+
+  lua_newtable(L);
+  for (int i = 0; i < t->array_size; i++) {
+    lua_newtable(L);
+    if (upb_arrhas(t->array[i])) {
+      lupbtable_pushval(L, t->array[i], t->t.type);
+      lua_setfield(L, -2, "val");
+    }
+    lupbtable_setmetafields(L, t->t.type, &t->array[i]);
+    lua_rawseti(L, -2, i + 1);
+  }
+  lua_setfield(L, -2, "array");
+}
+
+static void lupbtable_pushstrtable(lua_State *L, const upb_strtable *t) {
+  lupbtable_pushtable(L, &t->t, false);
+}
+
+static int lupbtable_msgdef_itof(lua_State *L) {
+  const upb_msgdef *m = lupb_msgdef_check(L, 1);
+  lupbtable_pushinttable(L, &m->itof);
+  return 1;
+}
+
+static int lupbtable_msgdef_ntof(lua_State *L) {
+  const upb_msgdef *m = lupb_msgdef_check(L, 1);
+  lupbtable_pushstrtable(L, &m->ntof);
+  return 1;
+}
+
+static int lupbtable_enumdef_iton(lua_State *L) {
+  const upb_enumdef *e = lupb_enumdef_check(L, 1);
+  lupbtable_pushinttable(L, &e->iton);
+  return 1;
+}
+
+static int lupbtable_enumdef_ntoi(lua_State *L) {
+  const upb_enumdef *e = lupb_enumdef_check(L, 1);
+  lupbtable_pushstrtable(L, &e->ntoi);
+  return 1;
+}
+
+static void lupbtable_setfieldi(lua_State *L, const char *field, int i) {
+  lua_pushnumber(L, i);
+  lua_setfield(L, -2, field);
+}
+
+static const struct luaL_Reg lupbtable_toplevel_m[] = {
+  {"msgdef_itof", lupbtable_msgdef_itof},
+  {"msgdef_ntof", lupbtable_msgdef_ntof},
+  {"enumdef_iton", lupbtable_enumdef_iton},
+  {"enumdef_ntoi", lupbtable_enumdef_ntoi},
+  {NULL, NULL}
+};
+
+int luaopen_upbtable(lua_State *L) {
+  lupb_newlib(L, "upb.table", lupbtable_toplevel_m);
+
+  // We define these here because they are not public (at least at the moment).
+  lupbtable_setfieldi(L, "CTYPE_PTR", UPB_CTYPE_PTR);
+  lupbtable_setfieldi(L, "CTYPE_INT32", UPB_CTYPE_INT32);
+
+  lua_pushlightuserdata(L, NULL);
+  lua_setfield(L, -2, "NULL");
+
+  return 1;  // Return a single Lua value, the package table created above.
+}
--- a/bindings/lua/test.lua
+++ b/bindings/lua/test.lua
@ -1,109 +1,262 @@

-require "upb"
+local upb = require "upb"
+local lunit = require "lunitx"

-symtab = upb.SymbolTable{
-  upb.MessageDef{fqname="A", fields={
-    upb.FieldDef{name="a", type=upb.TYPE_INT32, number=1},
-    upb.FieldDef{name="b", type=upb.TYPE_DOUBLE, number=2}}
+if _VERSION >= 'Lua 5.2' then
+  _ENV = lunit.module("testupb", "seeall")
+else
+  module("testupb", lunit.testcase, package.seeall)
+end
+
+function test_fielddef()
+  local f = upb.FieldDef()
+  assert_false(f:is_frozen())
+  assert_nil(f:number())
+  assert_nil(f:name())
+  assert_equal(upb.LABEL_OPTIONAL, f:label())
+
+  f:set_name("foo_field")
+  f:set_number(3)
+  f:set_label(upb.LABEL_REPEATED)
+  f:set_type(upb.TYPE_FLOAT)
+
+  assert_equal("foo_field", f:name())
+  assert_equal(3, f:number())
+  assert_equal(upb.LABEL_REPEATED, f:label())
+  assert_equal(upb.TYPE_FLOAT, f:type())
+
+  local f2 = upb.FieldDef{
+    name = "foo", number = 5, type = upb.TYPE_DOUBLE, label = upb.LABEL_REQUIRED
  }
-}
-
-symtab = upb.SymbolTable{
-  upb.MessageDef{fqname="A", fields={
-    upb.FieldDef{name="a", type=upb.TYPE_INT32, number=1},
-    upb.FieldDef{name="b", type=upb.TYPE_DOUBLE, number=2}}
-  },
-  upb.MessageDef{fqname="B"}
-}
-A, B, C = symtab:lookup("A", "B")
-print(A)
-print(B)
-print(C)
-
-a = A()
-a2 = upb.Message(A)
-print("YO!  a.a=" .. tostring(a.a) .. ", a2.a=" .. tostring(a2.a))
-a.a = 2
-a2.a = 3
-print("YO!  a.a=" .. tostring(a.a) .. ", a2.a=" .. tostring(a2.a))
-
-A = symtab:lookup("A")
-if not A then
-  error("Could not find A")
+
+  assert_equal("foo", f2:name())
+  assert_equal(5, f2:number())
+  assert_equal(upb.TYPE_DOUBLE, f2:type())
+  assert_equal(upb.LABEL_REQUIRED, f2:label())
 end

-f = io.open("../../upb/descriptor.pb")
-if not f then
-  error("Couldn't open descriptor.pb, try running 'make descriptorgen'")
+function test_enumdef()
+  local e = upb.EnumDef()
+  assert_equal(0, #e)
+  assert_nil(e:value(5))
+  assert_nil(e:value("NONEXISTENT_NAME"))
+
+  for name, value in e:values() do
+    fail()
+  end
+
+  e:add("VAL1", 1)
+  e:add("VAL2", 2)
+
+  local values = {}
+  for name, value in e:values() do
+    values[name] = value
+  end
+
+  assert_equal(1, values["VAL1"])
+  assert_equal(2, values["VAL2"])
+
+  local e2 = upb.EnumDef{
+    values = {
+      {"FOO", 1},
+      {"BAR", 77},
+    }
+  }
+
+  assert_equal(1, e2:value("FOO"))
+  assert_equal(77, e2:value("BAR"))
+  assert_equal("FOO", e2:value(1))
+  assert_equal("BAR", e2:value(77))
 end
-symtab:parsedesc(f:read("*all"))
-symtab:load_descriptor()
-symtab:load_descriptor_file()

-upb.pb.load_descriptor(f:read("*all"))
+function test_empty_msgdef()
+  local md = upb.MessageDef()
+  assert_nil(md:full_name())  -- Def without name is anonymous.
+  assert_false(md:is_frozen())
+  assert_equal(0, #md)
+  assert_nil(md:field("nonexistent_field"))
+  assert_nil(md:field(3))
+  for field in md:fields() do
+    fail()
+  end
+
+  upb.freeze(md)
+  assert_true(md:is_frozen())
+  assert_equal(0, #md)
+  assert_nil(md:field("nonexistent_field"))
+  assert_nil(md:field(3))
+  for field in md:fields() do
+    fail()
+  end
+end
+
+function test_msgdef_constructor()
+  local f1 = upb.FieldDef{name = "field1", number = 7, type = upb.TYPE_INT32}
+  local f2 = upb.FieldDef{name = "field2", number = 8, type = upb.TYPE_INT32}
+  local md = upb.MessageDef{
+    full_name = "TestMessage",
+    fields = {f1, f2}
+  }
+  assert_equal("TestMessage", md:full_name())
+  assert_false(md:is_frozen())
+  assert_equal(2, #md)
+  assert_equal(f1, md:field("field1"))
+  assert_equal(f2, md:field("field2"))
+  assert_equal(f1, md:field(7))
+  assert_equal(f2, md:field(8))
+  local count = 0
+  local found = {}
+  for field in md:fields() do
+    count = count + 1
+    found[field] = true
+  end
+  assert_equal(2, count)
+  assert_true(found[f1])
+  assert_true(found[f2])

-upb.pb.load_descriptor_file("../../src/descriptor.pb", symtab)
+  upb.freeze(md)
+end
+
+function test_msgdef_setters()
+  local md = upb.MessageDef()
+  md:set_full_name("Message1")
+  assert_equal("Message1", md:full_name())
+  local f = upb.FieldDef{name = "field1", number = 3, type = upb.TYPE_DOUBLE}
+  md:add{f}
+  assert_equal(1, #md)
+  assert_equal(f, md:field("field1"))
+end
+
+function test_msgdef_errors()
+  assert_error(function() upb.MessageDef{bad_initializer_key = 5} end)
+  local md = upb.MessageDef()
+  assert_error(function()
+    -- Duplicate field number.
+    upb.MessageDef{
+      fields = {
+        upb.FieldDef{name = "field1", number = 1, type = upb.TYPE_INT32},
+        upb.FieldDef{name = "field2", number = 1, type = upb.TYPE_INT32}
+      }
+    }
+  end)
+  assert_error(function()
+    -- Duplicate field name.
+    upb.MessageDef{
+      fields = {
+        upb.FieldDef{name = "field1", number = 1, type = upb.TYPE_INT32},
+        upb.FieldDef{name = "field1", number = 2, type = upb.TYPE_INT32}
+      }
+    }
+  end)
+
+  -- attempt to set a name with embedded NULLs.
+  assert_error_match("names cannot have embedded NULLs", function()
+    md:set_full_name("abc\0def")
+  end)
+
+  upb.freeze(md)
+  -- Attempt to mutate frozen MessageDef.
+  -- TODO(haberman): better error message and test for message.
+  assert_error(function()
+    md:add{upb.FieldDef{name = "field1", number = 1, type = upb.TYPE_INT32}}
+  end)
+  assert_error(function()
+    md:set_full_name("abc")
+  end)
+
+  -- Attempt to freeze a msgdef without freezing its subdef.
+  assert_error_match("is not frozen or being frozen", function()
+    m1 = upb.MessageDef()
+    upb.freeze(
+      upb.MessageDef{
+        fields = {
+          upb.FieldDef{name = "f1", number = 1, type = upb.TYPE_MESSAGE,
+                       subdef = m1}
+        }
+      }
+    )
+  end)
+end
+
+function test_symtab()
+  local empty = upb.SymbolTable()
+  assert_equal(0, #empty:getdefs(upb.DEF_ANY))
+
+  local symtab = upb.SymbolTable{
+    upb.MessageDef{full_name = "TestMessage"},
+    upb.MessageDef{full_name = "ContainingMessage", fields = {
+      upb.FieldDef{name = "field1", number = 1, type = upb.TYPE_INT32},
+      upb.FieldDef{name = "field2", number = 2, type = upb.TYPE_MESSAGE,
+                   subdef_name = ".TestMessage"}
+      }
+    }
+  }
+
+  local msgdef1 = symtab:lookup("TestMessage")
+  local msgdef2 = symtab:lookup("ContainingMessage")
+  assert_not_nil(msgdef1)
+  assert_not_nil(msgdef2)
+  assert_equal(msgdef1, msgdef2:field("field2"):subdef())
+  assert_true(msgdef1:is_frozen())
+  assert_true(msgdef2:is_frozen())
+
+  symtab:add{
+    upb.MessageDef{full_name = "ContainingMessage2", fields = {
+      upb.FieldDef{name = "field5", number = 5, type = upb.TYPE_MESSAGE,
+                   subdef = msgdef2}
+      }
+    }
+  }
+
+  local msgdef3 = symtab:lookup("ContainingMessage2")
+  assert_not_nil(msgdef3)
+  assert_equal(msgdef3:field("field5"):subdef(), msgdef2)
+end

-f = io.open("../../benchmarks/google_messages.proto.pb")
-if not f then
-  error("Couldn't open google_messages.proto.pb, try running 'make benchmarks'")
+-- Lua 5.1 and 5.2 have slightly different semantics for how a finalizer
+-- can be defined in Lua.
+if _VERSION >= 'Lua 5.2' then
+  function defer(fn)
+    setmetatable({}, { __gc = fn })
+  end
+else
+  function defer(fn)
+    getmetatable(newproxy(true)).__gc = fn
+  end
 end
-symtab:parsedesc(f:read("*all"))

-for _, def in ipairs(symtab:getdefs(-1)) do
-  print(def:name())
+function test_finalizer()
+  -- Tests that we correctly handle a call into an already-finalized object.
+  -- Collectible objects are finalized in the opposite order of creation.
+  do
+    local t = {}
+    defer(function()
+      assert_error_match("called into dead def", function()
+        -- Generic def call.
+        t[1]:full_name()
+      end)
+      assert_error_match("called into dead msgdef", function()
+        -- Specific msgdef call.
+        t[1]:add()
+      end)
+      assert_error_match("called into dead enumdef", function()
+        t[2]:values()
+      end)
+      assert_error_match("called into dead fielddef", function()
+        t[3]:number()
+      end)
+      assert_error_match("called into dead symtab",
+        function() t[4]:lookup()
+      end)
+    end)
+    t = {
+      upb.MessageDef(),
+      upb.EnumDef(),
+      upb.FieldDef(),
+      upb.SymbolTable(),
+    }
+  end
+  collectgarbage()
 end

-SpeedMessage1 = symtab:lookup("benchmarks.SpeedMessage1")
-SpeedMessage2 = symtab:lookup("benchmarks.SpeedMessage2")
-print(SpeedMessage1:name())
-
-msg = MyType()
-msg:Decode(str)
-
-msg:DecodeJSON(str)
-
-msg = upb.pb.decode(str, MyType)
-str = upb.pb.encode(msg)
-
-msg = upb.pb.decode_text(str, MyType)
-str = upb.pb.encode_text(msg)
-
-upb.clear(msg)
-upb.msgdef(msg)
-upb.has(msg, "foo_bar")
-
-msg = upb.json.decode(str, MyType)
-
-msg = upb.pb.DecodeText(str)
-msg = upb.pb.EncodeText(msg)
-upb.
-
-upb.pb.decode_into(msg, str)
-
-str = upb.json.Encode(msg)
-upb.json.DecodeInto(msg, str)
-f = assert(io.open("../../benchmarks/google_message1.dat"))
-msg:Parse(f:read("*all"))
-print(msg:ToText())
-print(upb.json.encode(msg))
-
-msg = SpeedMessage2()
-f = assert(io.open("../../benchmarks/google_message2.dat"))
-msg:Parse(f:read("*all"))
-print(msg:ToText())
--msg:Serialize()
--msg:FromText(str)
-- print(msg.field129)
-- print(msg.field271)
--print(msg.field15.field15)
--msg.field15.field15 = "my override"
--print(msg.field15.field15)
-- print(msg.field1)
-- print(msg.field1)
-- msg.field1 = "YEAH BABY!"
-- print(msg.field1)
-- print(msg.field129)
-- msg.field129 = 5
-- print(msg.field129)
--]]
+lunit.main()
--- a/bindings/lua/upb.c
+++ b/bindings/lua/upb.c
--- a/bindings/lua/upb.h
+++ b/bindings/lua/upb.h
@ -0,0 +1,45 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2012 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Shared definitions for upb Lua modules.
+ */
+
+#ifndef UPB_LUA_UPB_H_
+#define UPB_LUA_UPB_H_
+
+#include "upb/def.h"
+
+// Lua 5.1/5.2 compatibility code.
+#if LUA_VERSION_NUM == 501
+
+#define lua_rawlen lua_objlen
+#define lupb_newlib(L, name, l) luaL_register(L, name, l)
+#define lupb_setfuncs(L, l) luaL_register(L, NULL, l)
+#define LUPB_OPENFUNC(mod) luaopen_ ## mod ## upb5_1
+
+void *luaL_testudata(lua_State *L, int ud, const char *tname);
+
+#elif LUA_VERSION_NUM == 502
+
+// Lua 5.2 modules are not expected to set a global variable, so "name" is
+// unused.
+#define lupb_newlib(L, name, l) luaL_newlib(L, l)
+#define lupb_setfuncs(L, l) luaL_setfuncs(L, l, 0)
+int luaL_typerror(lua_State *L, int narg, const char *tname);
+#define LUPB_OPENFUNC(mod) luaopen_ ## mod ## upb5_2
+
+#else
+#error Only Lua 5.1 and 5.2 are supported
+#endif
+
+const upb_msgdef *lupb_msgdef_check(lua_State *L, int narg);
+const upb_enumdef *lupb_enumdef_check(lua_State *L, int narg);
+const char *lupb_checkname(lua_State *L, int narg);
+bool lupb_def_pushwrapper(lua_State *L, const upb_def *def, const void *owner);
+void lupb_def_pushnewrapper(lua_State *L, const upb_def *def,
+                            const void *owner);
+
+#endif  // UPB_LUA_UPB_H_
--- a/dynasm/COPYRIGHT
+++ b/dynasm/COPYRIGHT
@ -1,5 +1,3 @@
-DynASM is taken from LuaJIT 2, which carries the following license statement:
-
 ===============================================================================
 LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/

--- a/tests/test_cpp.cc
+++ b/tests/test_cpp.cc
@ -8,20 +8,20 @@
 */

 #include <stdio.h>
+#include <string.h>
 #include <iostream>
-#include "upb/bytestream.hpp"
-#include "upb/def.hpp"
-#include "upb/handlers.hpp"
-#include "upb/upb.hpp"
-#include "upb/pb/decoder.hpp"
-#include "upb/pb/glue.hpp"
+#include "upb/bytestream.h"
+#include "upb/def.h"
+#include "upb/handlers.h"
+#include "upb/pb/glue.h"
 #include "upb_test.h"
+#include "upb/upb.h"

 static void TestSymbolTable(const char *descriptor_file) {
  upb::SymbolTable *s = upb::SymbolTable::New(&s);
  upb::Status status;
  if (!upb::LoadDescriptorFileIntoSymtab(s, descriptor_file, &status)) {
-    std::cerr << "Couldn't load descriptor: " << status;
+    std::cerr << "Couldn't load descriptor: " << status.GetString();
    exit(1);
  }
  const upb::MessageDef *md = s->LookupMessage("A", &md);
@ -41,7 +41,9 @@ static void TestByteStream() {
  free(str);
 }

-int main(int argc, char *argv[]) {
+extern "C" {
+
+int run_tests(int argc, char *argv[]) {
  if (argc < 2) {
    fprintf(stderr, "Usage: test_cpp <descriptor file>\n");
    return 1;
@ -50,3 +52,5 @@ int main(int argc, char *argv[]) {
  TestByteStream();
  return 0;
 }
+
+}
--- a/tests/test_decoder.cc
+++ b/tests/test_decoder.cc
@ -7,6 +7,7 @@
 * input, with buffer breaks in arbitrary places.
 *
 * Tests to add:
+ * - string/bytes
 * - unknown field handler called appropriately
 * - unknown fields can be inserted in random places
 * - fuzzing of valid input
@ -35,6 +36,9 @@
 #include "upb/pb/varint.h"
 #include "upb/upb.h"
 #include "upb_test.h"
+#include "third_party/upb/tests/test_decoder_schema.upb.h"
+
+uint32_t filter_hash = 0;

 // Copied from decoder.c, since this is not a public interface.
 typedef struct {
@ -186,66 +190,78 @@ void indent(void *depth) {
  indentbuf(&output, *(int*)depth);
 }

-#define VALUE_HANDLER(member, fmt) \
-  upb_flow_t value_ ## member(void *closure, upb_value fval, upb_value val) { \
-    indent(closure);                                                          \
-    output.appendf("%" PRIu32 ":%" fmt "\n",                                  \
-                   upb_value_getuint32(fval), upb_value_get ## member(val));  \
-    return UPB_CONTINUE;                                                      \
+#define NUMERIC_VALUE_HANDLER(member, ctype, fmt) \
+  bool value_ ## member(void *closure, void *fval, ctype val) {       \
+    indent(closure);                                                  \
+    uint32_t *num = static_cast<uint32_t*>(fval);                     \
+    output.appendf("%" PRIu32 ":%" fmt "\n", *num, val);              \
+    return true;                                                      \
  }

-VALUE_HANDLER(uint32, PRIu32)
-VALUE_HANDLER(uint64, PRIu64)
-VALUE_HANDLER(int32, PRId32)
-VALUE_HANDLER(int64, PRId64)
-VALUE_HANDLER(float, "g")
-VALUE_HANDLER(double, "g")
+NUMERIC_VALUE_HANDLER(uint32, uint32_t, PRIu32)
+NUMERIC_VALUE_HANDLER(uint64, uint64_t, PRIu64)
+NUMERIC_VALUE_HANDLER(int32,  int32_t,  PRId32)
+NUMERIC_VALUE_HANDLER(int64,  int64_t,  PRId64)
+NUMERIC_VALUE_HANDLER(float,  float,    "g")
+NUMERIC_VALUE_HANDLER(double, double,   "g")

-upb_flow_t value_bool(void *closure, upb_value fval, upb_value val) {
+bool value_bool(void *closure, void *fval, bool val) {
  indent(closure);
-  output.appendf("%" PRIu32 ":%s\n",
-                 upb_value_getuint32(fval),
-                 upb_value_getbool(val) ? "true" : "false");
-  return UPB_CONTINUE;
+  uint32_t *num = static_cast<uint32_t*>(fval);
+  output.appendf("%" PRIu32 ":%s\n", *num, val ? "true" : "false");
+  return true;
 }

-upb_flow_t value_string(void *closure, upb_value fval, upb_value val) {
-  // Note: won't work with strings that contain NULL.
+void* startstr(void *closure, void *fval, size_t size_hint) {
  indent(closure);
-  char *str = upb_byteregion_strdup(upb_value_getbyteregion(val));
-  output.appendf("%" PRIu32 ":%s\n", upb_value_getuint32(fval), str);
-  free(str);
-  return UPB_CONTINUE;
+  uint32_t *num = static_cast<uint32_t*>(fval);
+  output.appendf("%" PRIu32 ":(%zu)\"", *num, size_hint);
+  return ((int*)closure) + 1;
+}
+
+size_t value_string(void *closure, void *fval, const char *buf, size_t n) {
+  output.append(buf, n);
+  return n;
+}
+
+bool endstr(void *closure, void *fval) {
+  UPB_UNUSED(fval);
+  output.append("\"\n");
+  return true;
 }

-upb_sflow_t startsubmsg(void *closure, upb_value fval) {
+void* startsubmsg(void *closure, void *fval) {
  indent(closure);
-  output.appendf("%" PRIu32 ":{\n", upb_value_getuint32(fval));
-  return UPB_CONTINUE_WITH(((int*)closure) + 1);
+  uint32_t *num = static_cast<uint32_t*>(fval);
+  output.appendf("%" PRIu32 ":{\n", *num);
+  return ((int*)closure) + 1;
 }

-upb_flow_t endsubmsg(void *closure, upb_value fval) {
+bool endsubmsg(void *closure, void *fval) {
+  UPB_UNUSED(fval);
  indent(closure);
  output.append("}\n");
-  return UPB_CONTINUE;
+  return true;
 }

-upb_sflow_t startseq(void *closure, upb_value fval) {
+void* startseq(void *closure, void *fval) {
  indent(closure);
-  output.appendf("%" PRIu32 ":[\n", upb_value_getuint32(fval));
-  return UPB_CONTINUE_WITH(((int*)closure) + 1);
+  uint32_t *num = static_cast<uint32_t*>(fval);
+  output.appendf("%" PRIu32 ":[\n", *num);
+  return ((int*)closure) + 1;
 }

-upb_flow_t endseq(void *closure, upb_value fval) {
+bool endseq(void *closure, void *fval) {
+  UPB_UNUSED(fval);
  indent(closure);
  output.append("]\n");
-  return UPB_CONTINUE;
+  return true;
 }

-upb_flow_t startmsg(void *closure) {
+bool startmsg(void *closure) {
  indent(closure);
  output.append("<\n");
-  return UPB_CONTINUE;
+  return true;
 }

 void endmsg(void *closure, upb_status *status) {
@ -254,14 +270,23 @@ void endmsg(void *closure, upb_status *status) {
  output.append(">\n");
 }

-void doreg(upb_mhandlers *m, uint32_t num, upb_fieldtype_t type, bool repeated,
-           upb_value_handler *handler) {
-  upb_fhandlers *f = upb_mhandlers_newfhandlers(m, num, type, repeated);
+void free_uint32(void *val) {
+  uint32_t *u32 = static_cast<uint32_t*>(val);
+  delete u32;
+}
+
+template<class T>
+void doreg(upb_handlers *h, uint32_t num,
+           typename upb::Handlers::Value<T>::Handler *handler) {
+  const upb_fielddef *f = upb_msgdef_itof(upb_handlers_msgdef(h), num);
  ASSERT(f);
-  upb_fhandlers_setvalue(f, handler);
-  upb_fhandlers_setstartseq(f, &startseq);
-  upb_fhandlers_setendseq(f, &endseq);
-  upb_fhandlers_setfval(f, upb_value_uint32(num));
+  ASSERT(h->SetValueHandler<T>(f, handler, new uint32_t(num), free_uint32));
+  if (f->IsSequence()) {
+    ASSERT(h->SetStartSequenceHandler(
+        f, &startseq, new uint32_t(num), free_uint32));
+    ASSERT(h->SetEndSequenceHandler(
+        f, &endseq, new uint32_t(num), free_uint32));
+  }
 }

 // The repeated field number to correspond to the given non-repeated field
@ -273,57 +298,81 @@ uint32_t rep_fn(uint32_t fn) {
 #define NOP_FIELD 40
 #define UNKNOWN_FIELD 666

-void reg(upb_mhandlers *m, upb_fieldtype_t type, upb_value_handler *handler) {
+template <class T>
+void reg(upb_handlers *h, upb_fieldtype_t type,
+         typename upb::Handlers::Value<T>::Handler *handler) {
  // We register both a repeated and a non-repeated field for every type.
  // For the non-repeated field we make the field number the same as the
  // type.  For the repeated field we make it a function of the type.
-  doreg(m, type, type, false, handler);
-  doreg(m, rep_fn(type), type, true, handler);
+  doreg<T>(h, type, handler);
+  doreg<T>(h, rep_fn(type), handler);
 }

-void reg_subm(upb_mhandlers *m, uint32_t num, upb_fieldtype_t type,
-              bool repeated) {
-  upb_fhandlers *f =
-      upb_mhandlers_newfhandlers_subm(m, num, type, repeated, m);
+void reg_subm(upb_handlers *h, uint32_t num) {
+  const upb_fielddef *f = upb_msgdef_itof(upb_handlers_msgdef(h), num);
  ASSERT(f);
-  upb_fhandlers_setstartseq(f, &startseq);
-  upb_fhandlers_setendseq(f, &endseq);
-  upb_fhandlers_setstartsubmsg(f, &startsubmsg);
-  upb_fhandlers_setendsubmsg(f, &endsubmsg);
-  upb_fhandlers_setfval(f, upb_value_uint32(num));
+  if (f->IsSequence()) {
+    ASSERT(h->SetStartSequenceHandler(
+        f, &startseq, new uint32_t(num), free_uint32));
+    ASSERT(h->SetEndSequenceHandler(
+        f, &endseq, new uint32_t(num), free_uint32));
+  }
+  ASSERT(h->SetStartSubMessageHandler(
+      f, &startsubmsg, new uint32_t(num), free_uint32));
+  ASSERT(h->SetEndSubMessageHandler(
+      f, &endsubmsg, new uint32_t(num), free_uint32));
+  ASSERT(upb_handlers_setsubhandlers(h, f, h));
 }

-void reghandlers(upb_mhandlers *m) {
-  upb_mhandlers_setstartmsg(m, &startmsg);
-  upb_mhandlers_setendmsg(m, &endmsg);
+void reg_str(upb_handlers *h, uint32_t num) {
+  const upb_fielddef *f = upb_msgdef_itof(upb_handlers_msgdef(h), num);
+  ASSERT(f);
+  if (f->IsSequence()) {
+    ASSERT(h->SetStartSequenceHandler(
+        f, &startseq, new uint32_t(num), free_uint32));
+    ASSERT(h->SetEndSequenceHandler(
+        f, &endseq, new uint32_t(num), free_uint32));
+  }
+  ASSERT(h->SetStartStringHandler(
+      f, &startstr, new uint32_t(num), free_uint32));
+  ASSERT(h->SetEndStringHandler(
+      f, &endstr, new uint32_t(num), free_uint32));
+  ASSERT(h->SetStringHandler(
+      f, &value_string, new uint32_t(num), free_uint32));
+}
+
+void reghandlers(upb_handlers *h) {
+  upb_handlers_setstartmsg(h, &startmsg);
+  upb_handlers_setendmsg(h, &endmsg);

  // Register handlers for each type.
-  reg(m, UPB_TYPE(DOUBLE),   &value_double);
-  reg(m, UPB_TYPE(FLOAT),    &value_float);
-  reg(m, UPB_TYPE(INT64),    &value_int64);
-  reg(m, UPB_TYPE(UINT64),   &value_uint64);
-  reg(m, UPB_TYPE(INT32) ,   &value_int32);
-  reg(m, UPB_TYPE(FIXED64),  &value_uint64);
-  reg(m, UPB_TYPE(FIXED32),  &value_uint32);
-  reg(m, UPB_TYPE(BOOL),     &value_bool);
-  reg(m, UPB_TYPE(STRING),   &value_string);
-  reg(m, UPB_TYPE(BYTES),    &value_string);
-  reg(m, UPB_TYPE(UINT32),   &value_uint32);
-  reg(m, UPB_TYPE(ENUM),     &value_int32);
-  reg(m, UPB_TYPE(SFIXED32), &value_int32);
-  reg(m, UPB_TYPE(SFIXED64), &value_int64);
-  reg(m, UPB_TYPE(SINT32),   &value_int32);
-  reg(m, UPB_TYPE(SINT64),   &value_int64);
+  reg<double>  (h, UPB_TYPE(DOUBLE),   &value_double);
+  reg<float>   (h, UPB_TYPE(FLOAT),    &value_float);
+  reg<int64_t> (h, UPB_TYPE(INT64),    &value_int64);
+  reg<uint64_t>(h, UPB_TYPE(UINT64),   &value_uint64);
+  reg<int32_t> (h, UPB_TYPE(INT32) ,   &value_int32);
+  reg<uint64_t>(h, UPB_TYPE(FIXED64),  &value_uint64);
+  reg<uint32_t>(h, UPB_TYPE(FIXED32),  &value_uint32);
+  reg<bool>    (h, UPB_TYPE(BOOL),     &value_bool);
+  reg<uint32_t>(h, UPB_TYPE(UINT32),   &value_uint32);
+  reg<int32_t> (h, UPB_TYPE(ENUM),     &value_int32);
+  reg<int32_t> (h, UPB_TYPE(SFIXED32), &value_int32);
+  reg<int64_t> (h, UPB_TYPE(SFIXED64), &value_int64);
+  reg<int32_t> (h, UPB_TYPE(SINT32),   &value_int32);
+  reg<int64_t> (h, UPB_TYPE(SINT64),   &value_int64);
+
+  reg_str(h, UPB_TYPE(STRING));
+  reg_str(h, UPB_TYPE(BYTES));
+  reg_str(h, rep_fn(UPB_TYPE(STRING)));
+  reg_str(h, rep_fn(UPB_TYPE(BYTES)));

  // Register submessage/group handlers that are self-recursive
  // to this type, eg: message M { optional M m = 1; }
-  reg_subm(m, UPB_TYPE(MESSAGE),         UPB_TYPE(MESSAGE), false);
-  reg_subm(m, UPB_TYPE(GROUP),           UPB_TYPE(GROUP),   false);
-  reg_subm(m, rep_fn(UPB_TYPE(MESSAGE)), UPB_TYPE(MESSAGE), true);
-  reg_subm(m, rep_fn(UPB_TYPE(GROUP)),   UPB_TYPE(GROUP),   true);
+  reg_subm(h, UPB_TYPE(MESSAGE));
+  reg_subm(h, rep_fn(UPB_TYPE(MESSAGE)));

-  // Register a no-op string field so we can pad the proto wherever we want.
-  upb_mhandlers_newfhandlers(m, NOP_FIELD, UPB_TYPE(STRING), false);
+  // For NOP_FIELD we register no handlers, so we can pad a proto freely without
+  // changing the output.
 }


@ -413,22 +462,32 @@ upb_byteregion *upb_seamsrc_allbytes(upb_seamsrc *s) {
 /* Running of test cases ******************************************************/

 upb_decoderplan *plan;
+
+uint32_t Hash(const buffer& proto, const buffer* expected_output) {
+  uint32_t hash = MurmurHash2(proto.buf(), proto.len(), 0);
+  if (expected_output)
+    hash = MurmurHash2(expected_output->buf(), expected_output->len(), hash);
+  bool hasjit = upb_decoderplan_hasjitcode(plan);
+  hash = MurmurHash2(&hasjit, 1, hash);
+  return hash;
+}
+
 #define LINE(x) x "\n"
 void run_decoder(const buffer& proto, const buffer* expected_output) {
+  testhash = Hash(proto, expected_output);
+  if (filter_hash && testhash != filter_hash) return;
  upb_seamsrc src;
  upb_seamsrc_init(&src, proto.buf(), proto.len());
  upb_decoder d;
  upb_decoder_init(&d);
-  upb_decoder_resetplan(&d, plan, 0);
+  upb_decoder_resetplan(&d, plan);
  for (size_t i = 0; i < proto.len(); i++) {
    for (size_t j = i; j < UPB_MIN(proto.len(), i + 5); j++) {
      upb_seamsrc_resetseams(&src, i, j);
      upb_byteregion *input = upb_seamsrc_allbytes(&src);
      output.clear();
      upb_decoder_resetinput(&d, input, &closures[0]);
-      upb_success_t success = UPB_SUSPENDED;
-      while (success == UPB_SUSPENDED)
-        success = upb_decoder_decode(&d);
+      upb_success_t success = upb_decoder_decode(&d);
      ASSERT(upb_ok(upb_decoder_status(&d)) == (success == UPB_OK));
      if (expected_output) {
        ASSERT_STATUS(success == UPB_OK, upb_decoder_status(&d));
@ -448,6 +507,7 @@ void run_decoder(const buffer& proto, const buffer* expected_output) {
  }
  upb_decoder_uninit(&d);
  upb_seamsrc_uninit(&src);
+  testhash = 0;
 }

 const static buffer thirty_byte_nop = buffer(cat(
@ -777,35 +837,47 @@ void run_tests() {
  test_valid();
 }

-int main() {
+extern "C" {
+
+int run_tests(int argc, char *argv[]) {
+  if (argc > 1)
+    filter_hash = strtol(argv[1], NULL, 16);
  for (int i = 0; i < UPB_MAX_NESTING; i++) {
    closures[i] = i;
  }
-  // Construct decoder plan.
-  upb_handlers *h = upb_handlers_new();
-  reghandlers(upb_handlers_newmhandlers(h));

  // Create an empty handlers to make sure that the decoder can handle empty
  // messages.
-  upb_handlers_newmhandlers(h);
+  upb_handlers *h = upb_handlers_new(UPB_TEST_DECODER_EMPTYMESSAGE, &h);
+  bool ok = upb_handlers_freeze(&h, 1, NULL);
+  ASSERT(ok);
+  plan = upb_decoderplan_new(h, true);
+  upb_handlers_unref(h, &h);
+  upb_decoderplan_unref(plan);
+
+  // Construct decoder plan.
+  h = upb_handlers_new(UPB_TEST_DECODER_DECODERTEST, &h);
+  reghandlers(h);
+  ok = upb_handlers_freeze(&h, 1, NULL);

  // Test without JIT.
  plan = upb_decoderplan_new(h, false);
+  ASSERT(!upb_decoderplan_hasjitcode(plan));
  run_tests();
  upb_decoderplan_unref(plan);

+#ifdef UPB_USE_JIT_X64
  // Test JIT.
  plan = upb_decoderplan_new(h, true);
-#ifdef UPB_USE_JIT_X64
  ASSERT(upb_decoderplan_hasjitcode(plan));
-#else
-  ASSERT(!upb_decoderplan_hasjitcode(plan));
-#endif
  run_tests();
  upb_decoderplan_unref(plan);
+#endif

  plan = NULL;
  printf("All tests passed, %d assertions.\n", num_assertions);
-  upb_handlers_unref(h);
+  upb_handlers_unref(h, &h);
  return 0;
 }
+
+}
--- a/tests/test_decoder_schema.proto
+++ b/tests/test_decoder_schema.proto
@ -0,0 +1,64 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2012 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// Schema used in test_decoder.cc.  It contains two fields (one optional
+// and one repeated) for each type.
+
+package upb.test_decoder;
+
+message M {
+  optional M m = 1;
+}
+
+enum E {
+  FOO = 1;
+}
+
+message EmptyMessage {}
+
+message DecoderTest {
+  optional double   f_double    = 1;
+  optional float    f_float     = 2;
+  optional int64    f_int64     = 3;
+  optional uint64   f_uint64    = 4;
+  optional int32    f_int32     = 5;
+  optional fixed64  f_fixed64   = 6;
+  optional fixed32  f_fixed32   = 7;
+  optional bool     f_bool      = 8;
+  optional string   f_string    = 9;
+  optional bytes    f_bytes     = 12;
+  optional uint32   f_uint32    = 13;
+  optional sfixed32 f_sfixed32  = 15;
+  optional sfixed64 f_sfixed64  = 16;
+  optional sint32   f_sint32    = 17;
+  optional sint64   f_sint64    = 18;
+
+  optional DecoderTest f_message  = 11;
+  optional E f_enum     = 14;
+
+
+  repeated double   r_double    = 536869912;
+  repeated float    r_float     = 536869913;
+  repeated int64    r_int64     = 536869914;
+  repeated uint64   r_uint64    = 536869915;
+  repeated int32    r_int32     = 536869916;
+  repeated fixed64  r_fixed64   = 536869917;
+  repeated fixed32  r_fixed32   = 536869918;
+  repeated bool     r_bool      = 536869919;
+  repeated string   r_string    = 536869920;
+  repeated bytes    r_bytes     = 536869923;
+  repeated uint32   r_uint32    = 536869924;
+  repeated sfixed32 r_sfixed32  = 536869926;
+  repeated sfixed64 r_sfixed64  = 536869927;
+  repeated sint32   r_sint32    = 536869928;
+  repeated sint64   r_sint64    = 536869929;
+
+  repeated DecoderTest r_message  = 536869922;
+  repeated E r_enum     = 536869925;
+
+  // To allow arbitrary padding.
+  optional string nop_field = 40;
+}
--- a/tests/test_def.c
+++ b/tests/test_def.c
@ -18,7 +18,7 @@ const char *descriptor_file;
 static void test_empty_symtab() {
  upb_symtab *s = upb_symtab_new(&s);
  int count;
-  const upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY, NULL);
+  const upb_def **defs = upb_symtab_getdefs(s, UPB_DEF_ANY, NULL, &count);
  ASSERT(count == 0);
  free(defs);
  upb_symtab_unref(s, &s);
@ -31,7 +31,7 @@ static upb_symtab *load_test_proto(void *owner) {
  if (!upb_load_descriptor_file_into_symtab(s, descriptor_file, &status)) {
    fprintf(stderr, "Error loading descriptor file: %s\n",
            upb_status_getstr(&status));
-    exit(1);
+    ASSERT(false);
  }
  upb_status_uninit(&status);
  return s;
@ -44,27 +44,30 @@ static void test_cycles() {
  // and then be incremented to one again.
  const upb_def *def = upb_symtab_lookup(s, "A", &def);
  ASSERT(def);
-  ASSERT(upb_def_isfinalized(def));
+  ASSERT(upb_def_isfrozen(def));
  upb_symtab_unref(s, &s);

  // Message A has only one subfield: "optional B b = 1".
-  const upb_msgdef *m = upb_downcast_msgdef_const(def);
-  upb_fielddef *f = upb_msgdef_itof(m, 1);
+  const upb_msgdef *m = upb_downcast_msgdef(def);
+  const upb_fielddef *f = upb_msgdef_itof(m, 1);
  ASSERT(f);
-  ASSERT(upb_hassubdef(f));
+  ASSERT(upb_fielddef_hassubdef(f));
  const upb_def *def2 = upb_fielddef_subdef(f);
-  ASSERT(upb_downcast_msgdef_const(def2));
+  ASSERT(upb_downcast_msgdef(def2));
  ASSERT(strcmp(upb_def_fullname(def2), "B") == 0);

  upb_def_ref(def2, &def2);
  upb_def_unref(def, &def);
+
+  // We know "def" is still alive because it's reachable from def2.
+  ASSERT(strcmp(upb_def_fullname(def), "A") == 0);
  upb_def_unref(def2, &def2);
 }

 static void test_fielddef_unref() {
  upb_symtab *s = load_test_proto(&s);
  const upb_msgdef *md = upb_symtab_lookupmsg(s, "A", &md);
-  upb_fielddef *f = upb_msgdef_itof(md, 1);
+  const upb_fielddef *f = upb_msgdef_itof(md, 1);
  upb_fielddef_ref(f, &f);

  // Unref symtab and msgdef; now fielddef is the only thing keeping the msgdef
@ -72,7 +75,7 @@ static void test_fielddef_unref() {
  upb_symtab_unref(s, &s);
  upb_msgdef_unref(md, &md);
  // Check that md is still alive.
-  ASSERT(strcmp(upb_def_fullname(UPB_UPCAST(md)), "A") == 0);
+  ASSERT(strcmp(upb_def_fullname(upb_upcast(md)), "A") == 0);

  // Check that unref of fielddef frees the whole remaining graph.
  upb_fielddef_unref(f, &f);
@ -82,14 +85,14 @@ static void test_fielddef_accessors() {
  upb_fielddef *f1 = upb_fielddef_new(&f1);
  upb_fielddef *f2 = upb_fielddef_new(&f2);

-  ASSERT(upb_fielddef_ismutable(f1));
+  ASSERT(!upb_fielddef_isfrozen(f1));
  upb_fielddef_setname(f1, "f1");
  upb_fielddef_setnumber(f1, 1937);
  upb_fielddef_settype(f1, UPB_TYPE(FIXED64));
  upb_fielddef_setlabel(f1, UPB_LABEL(REPEATED));
  ASSERT(upb_fielddef_number(f1) == 1937);

-  ASSERT(upb_fielddef_ismutable(f2));
+  ASSERT(!upb_fielddef_isfrozen(f2));
  upb_fielddef_setname(f2, "f2");
  upb_fielddef_setnumber(f2, 1572);
  upb_fielddef_settype(f2, UPB_TYPE(BYTES));
@ -98,6 +101,12 @@ static void test_fielddef_accessors() {

  upb_fielddef_unref(f1, &f1);
  upb_fielddef_unref(f2, &f2);
+
+  // Test that we don't leak an unresolved subdef name.
+  f1 = upb_fielddef_new(&f1);
+  upb_fielddef_settype(f1, UPB_TYPE(MESSAGE));
+  upb_fielddef_setsubdefname(f1, "YO");
+  upb_fielddef_unref(f1, &f1);
 }

 static upb_fielddef *newfield(
@ -108,23 +117,23 @@ static upb_fielddef *newfield(
  upb_fielddef_setnumber(f, num);
  upb_fielddef_settype(f, type);
  upb_fielddef_setlabel(f, label);
-  upb_fielddef_setsubtypename(f, type_name);
+  upb_fielddef_setsubdefname(f, type_name);
  return f;
 }

 static upb_msgdef *upb_msgdef_newnamed(const char *name, void *owner) {
  upb_msgdef *m = upb_msgdef_new(owner);
-  upb_def_setfullname(UPB_UPCAST(m), name);
+  upb_def_setfullname(upb_upcast(m), name);
  return m;
 }

 INLINE upb_enumdef *upb_enumdef_newnamed(const char *name, void *owner) {
  upb_enumdef *e = upb_enumdef_new(owner);
-  upb_def_setfullname(UPB_UPCAST(e), name);
+  upb_def_setfullname(upb_upcast(e), name);
  return e;
 }

-void test_replacement() {
+static void test_replacement() {
  upb_symtab *s = upb_symtab_new(&s);

  upb_msgdef *m = upb_msgdef_newnamed("MyMessage", &s);
@ -133,15 +142,15 @@ void test_replacement() {
  upb_msgdef *m2 = upb_msgdef_newnamed("MyMessage2", &s);
  upb_enumdef *e = upb_enumdef_newnamed("MyEnum", &s);

-  upb_def *newdefs[] = {UPB_UPCAST(m), UPB_UPCAST(m2), UPB_UPCAST(e)};
+  upb_def *newdefs[] = {upb_upcast(m), upb_upcast(m2), upb_upcast(e)};
  upb_status status = UPB_STATUS_INIT;
  ASSERT_STATUS(upb_symtab_add(s, newdefs, 3, &s, &status), &status);

  // Try adding a new definition of MyEnum, MyMessage should get replaced with
  // a new version.
  upb_enumdef *e2 = upb_enumdef_new(&s);
-  upb_def_setfullname(UPB_UPCAST(e2), "MyEnum");
-  upb_def *newdefs2[] = {UPB_UPCAST(e2)};
+  upb_def_setfullname(upb_upcast(e2), "MyEnum");
+  upb_def *newdefs2[] = {upb_upcast(e2)};
  ASSERT_STATUS(upb_symtab_add(s, newdefs2, 1, &s, &status), &status);

  const upb_msgdef *m3 = upb_symtab_lookupmsg(s, "MyMessage", &m3);
@ -159,7 +168,95 @@ void test_replacement() {
  upb_symtab_unref(s, &s);
 }

-int main(int argc, char *argv[]) {
+static void test_freeze_free() {
+  // Test that freeze frees defs that were only being kept alive by virtue of
+  // sharing a group with other defs that are being frozen.
+  upb_msgdef *m1 = upb_msgdef_newnamed("M1", &m1);
+  upb_msgdef *m2 = upb_msgdef_newnamed("M2", &m2);
+  upb_msgdef *m3 = upb_msgdef_newnamed("M3", &m3);
+  upb_msgdef *m4 = upb_msgdef_newnamed("M4", &m4);
+
+  // Freeze M4 and make M1 point to it.
+  upb_def_freeze((upb_def*const*)&m4, 1, NULL);
+
+  upb_fielddef *f = upb_fielddef_new(&f);
+  upb_fielddef_settype(f, UPB_TYPE_MESSAGE);
+  ASSERT(upb_fielddef_setnumber(f, 1));
+  ASSERT(upb_fielddef_setname(f, "foo"));
+  ASSERT(upb_fielddef_setsubdef(f, upb_upcast(m4)));
+
+  ASSERT(upb_msgdef_addfield(m1, f, &f));
+
+  // After this unref, M1 is the only thing keeping M4 alive.
+  upb_msgdef_unref(m4, &m4);
+
+  // Force M1/M2/M3 into a single mutable refcounting group.
+  f = upb_fielddef_new(&f);
+  upb_fielddef_settype(f, UPB_TYPE_MESSAGE);
+  ASSERT(upb_fielddef_setnumber(f, 1));
+  ASSERT(upb_fielddef_setname(f, "foo"));
+
+  ASSERT(upb_fielddef_setsubdef(f, upb_upcast(m1)));
+  ASSERT(upb_fielddef_setsubdef(f, upb_upcast(m2)));
+  ASSERT(upb_fielddef_setsubdef(f, upb_upcast(m3)));
+
+  // Make M3 cyclic with itself.
+  ASSERT(upb_msgdef_addfield(m3, f, &f));
+
+  // These will be kept alive since they are in the same refcounting group as
+  // M3, which still has a ref.  Note: this behavior is not guaranteed by the
+  // API, but true in practice with its current implementation.
+  upb_msgdef_unref(m1, &m1);
+  upb_msgdef_unref(m2, &m2);
+
+  // Test that they are still alive (NOT allowed by the API).
+  ASSERT(strcmp("M1", upb_def_fullname(upb_upcast(m1))) == 0);
+  ASSERT(strcmp("M2", upb_def_fullname(upb_upcast(m2))) == 0);
+
+  // Freeze M3.  If the test leaked no memory, then freeing m1 and m2 was
+  // successful.
+  ASSERT(upb_def_freeze((upb_def*const*)&m3, 1, NULL));
+
+  upb_msgdef_unref(m3, &m3);
+}
+
+static void test_partial_freeze() {
+  // Test that freeze of only part of the graph correctly adjusts objects that
+  // point to the newly-frozen objects.
+  upb_msgdef *m1 = upb_msgdef_newnamed("M1", &m1);
+  upb_msgdef *m2 = upb_msgdef_newnamed("M2", &m2);
+  upb_msgdef *m3 = upb_msgdef_newnamed("M3", &m3);
+
+  upb_fielddef *f1 = upb_fielddef_new(&f1);
+  upb_fielddef_settype(f1, UPB_TYPE_MESSAGE);
+  ASSERT(upb_fielddef_setnumber(f1, 1));
+  ASSERT(upb_fielddef_setname(f1, "f1"));
+  ASSERT(upb_fielddef_setsubdef(f1, upb_upcast(m1)));
+
+  upb_fielddef *f2 = upb_fielddef_new(&f2);
+  upb_fielddef_settype(f2, UPB_TYPE_MESSAGE);
+  ASSERT(upb_fielddef_setnumber(f2, 2));
+  ASSERT(upb_fielddef_setname(f2, "f2"));
+  ASSERT(upb_fielddef_setsubdef(f2, upb_upcast(m2)));
+
+  ASSERT(upb_msgdef_addfield(m3, f1, &f1));
+  ASSERT(upb_msgdef_addfield(m3, f2, &f2));
+
+  // Freeze M1 and M2, which should cause the group to be split
+  // and m3 (left mutable) to take references on m1 and m2.
+  upb_def *defs[] = {upb_upcast(m1), upb_upcast(m2)};
+  ASSERT(upb_def_freeze(defs, 2, NULL));
+
+  ASSERT(upb_msgdef_isfrozen(m1));
+  ASSERT(upb_msgdef_isfrozen(m2));
+  ASSERT(!upb_msgdef_isfrozen(m3));
+
+  upb_msgdef_unref(m1, &m1);
+  upb_msgdef_unref(m2, &m2);
+  upb_msgdef_unref(m3, &m3);
+}
+
+int run_tests(int argc, char *argv[]) {
  if (argc < 2) {
    fprintf(stderr, "Usage: test_def <test.proto.pb>\n");
    return 1;
@ -170,5 +267,7 @@ int main(int argc, char *argv[]) {
  test_fielddef_accessors();
  test_fielddef_unref();
  test_replacement();
+  test_freeze_free();
+  test_partial_freeze();
  return 0;
 }
--- a/tests/test_table.cc
+++ b/tests/test_table.cc
@ -34,7 +34,7 @@ void test_strtable(const vector<std::string>& keys, uint32_t num_to_insert) {
  /* Initialize structures. */
  upb_strtable table;
  std::map<std::string, int32_t> m;
-  upb_strtable_init(&table);
+  upb_strtable_init(&table, UPB_CTYPE_INT32);
  std::set<std::string> all;
  for(size_t i = 0; i < num_to_insert; i++) {
    const std::string& key = keys[i];
@ -77,7 +77,7 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) {
  uint32_t largest_key = 0;
  std::map<uint32_t, uint32_t> m;
  __gnu_cxx::hash_map<uint32_t, uint32_t> hm;
-  upb_inttable_init(&table);
+  upb_inttable_init(&table, UPB_CTYPE_UINT32);
  for(size_t i = 0; i < num_entries; i++) {
    int32_t key = keys[i];
    largest_key = UPB_MAX((int32_t)largest_key, key);
@ -103,7 +103,7 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) {
    upb_value val;
    bool ret = upb_inttable_remove(&table, keys[i], &val);
    ASSERT(ret == (m.erase(keys[i]) == 1));
-    if (ret) ASSERT(upb_value_getuint32(val) == keys[i] * 2);
+    if (ret) ASSERT(upb_value_getuint32(val) == (uint32_t)keys[i] * 2);
    hm.erase(keys[i]);
    m.erase(keys[i]);
  }
@ -244,7 +244,9 @@ int32_t *get_contiguous_keys(int32_t num) {
  return buf;
 }

-int main(int argc, char *argv[]) {
+extern "C" {
+
+int run_tests(int argc, char *argv[]) {
  for (int i = 1; i < argc; i++) {
    if (strcmp(argv[i], "--benchmark") == 0) benchmark = true;
  }
@ -292,4 +294,7 @@ int main(int argc, char *argv[]) {
  }
  test_inttable(keys4, 64, "Table size: 64, keys: 1-32 and 10133-10164 ====\n");
  delete[] keys4;
+  return 0;
+}
+
 }
--- a/tests/test_varint.c
+++ b/tests/test_varint.c
@ -86,16 +86,19 @@ static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {
    printf("ok.\n"); \
  } \

-TEST_VARINT_DECODER(branch32);
-TEST_VARINT_DECODER(branch64);
+TEST_VARINT_DECODER(check2_branch32);
+TEST_VARINT_DECODER(check2_branch64);
 TEST_VARINT_DECODER(check2_wright);
 TEST_VARINT_DECODER(check2_massimino);

-int main() {
-  test_branch32();
-  test_branch64();
+int run_tests(int argc, char *argv[]) {
+  UPB_UNUSED(argc);
+  UPB_UNUSED(argv);
+  test_check2_branch32();
+  test_check2_branch64();
  test_check2_wright();
  test_check2_massimino();
+  return 0;
 }

 #if 0
--- a/tests/test_vs_proto2.cc
+++ b/tests/test_vs_proto2.cc
@ -10,19 +10,19 @@
 #define __STDC_LIMIT_MACROS  // So we get UINT32_MAX
 #include <assert.h>
 #include <google/protobuf/descriptor.h>
+#include <google/protobuf/message.h>
 #include <google/protobuf/wire_format_lite.h>
 #include <inttypes.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "benchmarks/google_messages.pb.h"
-#include "upb/def.hpp"
-#include "upb/handlers.hpp"
-#include "upb/msg.hpp"
-#include "upb/pb/decoder.hpp"
+#include "bindings/cpp/upb/pb/decoder.hpp"
+#include "upb/def.h"
+#include "upb/google/bridge.h"
+#include "upb/handlers.h"
 #include "upb/pb/glue.h"
 #include "upb/pb/varint.h"
-#include "upb/proto2_bridge.hpp"
 #include "upb_test.h"

 void compare_metadata(const google::protobuf::Descriptor* d,
@ -36,28 +36,25 @@ void compare_metadata(const google::protobuf::Descriptor* d,
    ASSERT(proto2_f);
    ASSERT(upb_f->number() == proto2_f->number());
    ASSERT(std::string(upb_f->name()) == proto2_f->name());
-    ASSERT(upb_f->type() == static_cast<upb::FieldType>(proto2_f->type()));
+    ASSERT(upb_f->type() == static_cast<upb::FieldDef::Type>(proto2_f->type()));
    ASSERT(upb_f->IsSequence() == proto2_f->is_repeated());
  }
 }

 void parse_and_compare(MESSAGE_CIDENT *msg1, MESSAGE_CIDENT *msg2,
-                       const upb::MessageDef *upb_md,
+                       const upb::Handlers *handlers,
                       const char *str, size_t len, bool allow_jit) {
  // Parse to both proto2 and upb.
  ASSERT(msg1->ParseFromArray(str, len));

-  upb::Handlers* handlers = upb::Handlers::New();
-  upb::RegisterWriteHandlers(handlers, upb_md);
  upb::DecoderPlan* plan = upb::DecoderPlan::New(handlers, allow_jit);
  upb::StringSource src(str, len);
  upb::Decoder decoder;
-  decoder.ResetPlan(plan, 0);
+  decoder.ResetPlan(plan);
  decoder.ResetInput(src.AllBytes(), msg2);
  msg2->Clear();
  ASSERT(decoder.Decode() == UPB_OK);
  plan->Unref();
-  handlers->Unref();

  // Would like to just compare the message objects themselves,  but
  // unfortunately MessageDifferencer is not part of the open-source release of
@ -83,7 +80,9 @@ void test_zig_zag() {

 }

-int main(int argc, char *argv[])
+extern "C" {
+
+int run_tests(int argc, char *argv[])
 {
  if (argc < 2) {
    fprintf(stderr, "Usage: test_vs_proto2 <message file>\n");
@ -102,18 +101,18 @@ int main(int argc, char *argv[])
  MESSAGE_CIDENT msg1;
  MESSAGE_CIDENT msg2;

-  const upb::MessageDef* m = upb::proto2_bridge::NewFinalMessageDef(msg1, &m);
+  const upb::Handlers* h = upb::google::NewWriteHandlers(msg1, &h);

-  compare_metadata(msg1.GetDescriptor(), m);
+  compare_metadata(msg1.GetDescriptor(), h->message_def());

  // Run twice to test proper object reuse.
-  parse_and_compare(&msg1, &msg2, m, str, len, true);
-  parse_and_compare(&msg1, &msg2, m, str, len, false);
-  parse_and_compare(&msg1, &msg2, m, str, len, true);
-  parse_and_compare(&msg1, &msg2, m, str, len, false);
+  parse_and_compare(&msg1, &msg2, h, str, len, false);
+  parse_and_compare(&msg1, &msg2, h, str, len, true);
+  parse_and_compare(&msg1, &msg2, h, str, len, false);
+  parse_and_compare(&msg1, &msg2, h, str, len, true);
  printf("All tests passed, %d assertions.\n", num_assertions);

-  m->Unref(&m);
+  h->Unref(&h);
  free((void*)str);

  test_zig_zag();
@ -121,3 +120,5 @@ int main(int argc, char *argv[])
  google::protobuf::ShutdownProtobufLibrary();
  return 0;
 }
+
+}
--- a/tests/testmain.cc
+++ b/tests/testmain.cc
@ -0,0 +1,18 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+// Author: haberman@google.com (Josh Haberman)
+
+#include <stdlib.h>
+#ifdef USE_GOOGLE
+#include "base/init_google.h"
+#endif
+
+extern "C" {
+int run_tests(int argc, char *argv[]);
+}
+
+int main(int argc, char *argv[]) {
+#ifdef USE_GOOGLE
+  InitGoogle(NULL, &argc, &argv, true);
+#endif
+  run_tests(argc, argv);
+}
--- a/tests/upb_test.h
+++ b/tests/upb_test.h
@ -9,25 +9,35 @@

 #include <stdio.h>
 #include <stdlib.h>
+#include <stdint.h>

 #ifdef __cplusplus
 extern "C" {
 #endif

 int num_assertions = 0;
+uint32_t testhash = 0;
+
+#define PRINT_FAILURE(expr) \
+  fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
+  fprintf(stderr, "expr: %s\n", #expr); \
+  if (testhash) { \
+    fprintf(stderr, "assertion failed running test %x.  " \
+                    "Run with the arg %x to run only this test.\n", \
+                    testhash, testhash); \
+  }
+
 #define ASSERT(expr) do { \
  ++num_assertions; \
  if (!(expr)) { \
-    fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
-    fprintf(stderr, "expr: %s\n", #expr); \
+    PRINT_FAILURE(expr) \
    abort(); \
  } \
 } while (0)

 #define ASSERT_NOCOUNT(expr) do { \
  if (!(expr)) { \
-    fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
-    fprintf(stderr, "expr: %s\n", #expr); \
+    PRINT_FAILURE(expr) \
    abort(); \
  } \
 } while (0)
@ -35,8 +45,7 @@ int num_assertions = 0;
 #define ASSERT_STATUS(expr, status) do { \
  ++num_assertions; \
  if (!(expr)) { \
-    fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
-    fprintf(stderr, "expr: %s\n", #expr); \
+    PRINT_FAILURE(expr) \
    fprintf(stderr, "failed status: %s\n", upb_status_getstr(status)); \
    abort(); \
  } \
--- a/tools/dump_cinit.lua
+++ b/tools/dump_cinit.lua
@ -0,0 +1,414 @@
+--[[
+
+  upb - a minimalist implementation of protocol buffers.
+
+  Copyright (c) 2012 Google Inc.  See LICENSE for details.
+  Author: Josh Haberman <jhaberman@gmail.com>
+
+  Routines for dumping internal data structures into C initializers
+  that can be compiled into a .o file.
+
+--]]
+
+local upbtable = require "upbtable"
+local upb = require "upb"
+local export = {}
+
+-- A tiny little abstraction that decouples the dump_* functions from
+-- what they're writing to (appending to a string, writing to file I/O, etc).
+-- This could possibly matter since naive string building is O(n^2) in the
+-- number of appends.
+function export.str_appender()
+  local str = ""
+  local function append(fmt, ...)
+    str = str .. string.format(fmt, ...)
+  end
+  local function get()
+    return str
+  end
+  return append, get
+end
+
+function export.file_appender(file)
+  local f = file
+  local function append(fmt, ...)
+    f:write(string.format(fmt, ...))
+  end
+  return append
+end
+
+-- const(f, label) -> UPB_LABEL_REPEATED, where f:label() == upb.LABEL_REPEATED
+function const(obj, name)
+  local val = obj[name](obj)
+  for k, v in pairs(upb) do
+    if v == val and string.find(k, "^" .. string.upper(name)) then
+      return "UPB_" .. k
+    end
+  end
+  assert(false, "Couldn't find constant")
+end
+
+--[[
+
+  LinkTable: an object that tracks all linkable objects and their offsets to
+  facilitate linking.
+
+--]]
+
+local LinkTable = {}
+function LinkTable:new(basename, types)
+  local linktab = {
+    basename = basename,
+    types = types,
+    table = {},  -- ptr -> {type, 0-based offset}
+    obj_arrays = {}  -- Establishes the ordering for each object type
+  }
+  for type, _ in pairs(types) do
+    linktab.obj_arrays[type] = {}
+  end
+  setmetatable(linktab, {__index = LinkTable})  -- Inheritance
+  return linktab
+end
+
+-- Adds a new object to the sequence of objects of this type.
+function LinkTable:add(objtype, ptr, obj)
+  obj = obj or ptr
+  assert(self.table[obj] == nil)
+  assert(self.types[objtype])
+  local arr = self.obj_arrays[objtype]
+  self.table[ptr] = {objtype, #arr}
+  arr[#arr + 1] = obj
+end
+
+-- Returns a C symbol name for the given objtype and offset.
+function LinkTable:csym(objtype, offset)
+  local typestr = assert(self.types[objtype])
+  return string.format("%s_%s[%d]", self.basename, typestr, offset)
+end
+
+-- Returns the address of the given C object.
+function LinkTable:addr(obj)
+  if obj == upbtable.NULL then
+    return "NULL"
+  else
+    local tabent = assert(self.table[obj], "unknown object")
+    return "&" .. self:csym(tabent[1], tabent[2])
+  end
+end
+
+-- Returns an array declarator indicating how many objects have been added.
+function LinkTable:cdecl(objtype)
+  return self:csym(objtype, #self.obj_arrays[objtype])
+end
+
+function LinkTable:objs(objtype)
+  -- Return iterator function, allowing use as:
+  --   for obj in linktable:objs(type) do
+  --     -- ...
+  --   done
+  local array = self.obj_arrays[objtype]
+  local i = 0
+  return function()
+    i = i + 1
+    if array[i] then return array[i] end
+  end
+end
+
+--[[
+
+  Dumper: an object that can dump C initializers for several constructs.
+  Uses a LinkTable to resolve references when necessary.
+
+--]]
+
+local Dumper = {}
+function Dumper:new(linktab)
+  local obj = {linktab = linktab}
+  setmetatable(obj, {__index = Dumper})  -- Inheritance
+  return obj
+end
+
+-- Dumps a upb_value, eg:
+--   UPB_VALUE_INIT_INT32(5)
+function Dumper:value(val, upbtype)
+  if type(val) == "nil" then
+    return "UPB_VALUE_INIT_NONE"
+  elseif type(val) == "number" then
+    -- Use upbtype to disambiguate what kind of number it is.
+    if upbtype == upbtable.CTYPE_INT32 then
+      return string.format("UPB_VALUE_INIT_INT32(%d)", val)
+    else
+      -- TODO(haberman): add support for these so we can properly support
+      -- default values.
+      error("Unsupported number type " .. upbtype)
+    end
+  elseif type(val) == "string" then
+    return string.format('UPB_VALUE_INIT_CONSTPTR("%s")', val)
+  else
+    -- We take this as an object reference that has an entry in the link table.
+    return string.format("UPB_VALUE_INIT_CONSTPTR(%s)", self.linktab:addr(val))
+  end
+end
+
+-- Dumps a table key.
+function Dumper:tabkey(key)
+  if type(key) == "nil" then
+    return "UPB_TABKEY_NONE"
+  elseif type(key) == "string" then
+    return string.format('UPB_TABKEY_STR("%s")', key)
+  else
+    return string.format("UPB_TABKEY_NUM(%d)", key)
+  end
+end
+
+-- Dumps a table entry.
+function Dumper:tabent(ent)
+  local key = self:tabkey(ent.key)
+  local val = self:value(ent.value, ent.valtype)
+  local next = self.linktab:addr(ent.next)
+  return string.format('  {%s, %s, %s},\n', key, val, next)
+end
+
+-- Dumps an inttable array entry.  This is almost the same as value() above,
+-- except that nil values have a special value to indicate "empty".
+function Dumper:arrayval(val)
+  if val.val then
+    return string.format("  %s,\n", self:value(val.val, val.valtype))
+  else
+    return "  UPB_ARRAY_EMPTYENT,\n"
+  end
+end
+
+-- Dumps an initializer for the given strtable/inttable (respectively).  Its
+-- entries must have previously been added to the linktable.
+function Dumper:strtable(t)
+  -- UPB_STRTABLE_INIT(count, mask, type, size_lg2, entries)
+  return string.format(
+      "UPB_STRTABLE_INIT(%d, %d, %d, %d, %s)",
+      t.count, t.mask, t.type, t.size_lg2, self.linktab:addr(t.entries[1].ptr))
+end
+
+function Dumper:inttable(t)
+  local lt = assert(self.linktab)
+  -- UPB_INTTABLE_INIT(count, mask, type, size_lg2, ent, a, asize, acount)
+  local entries = "NULL"
+  if #t.entries > 0 then
+    entries = lt:addr(t.entries[1].ptr)
+  end
+  return string.format(
+      "UPB_INTTABLE_INIT(%d, %d, %d, %d, %s, %s, %d, %d)",
+      t.count, t.mask, t.type, t.size_lg2, entries,
+      lt:addr(t.array[1].ptr), t.array_size, t.array_count)
+end
+
+-- A visitor for visiting all tables of a def.  Used first to count entries
+-- and later to dump them.
+local function gettables(def)
+  if def:def_type() == upb.DEF_MSG then
+    return {int = upbtable.msgdef_itof(def), str = upbtable.msgdef_ntof(def)}
+  elseif def:def_type() == upb.DEF_ENUM then
+    return {int = upbtable.enumdef_iton(def), str = upbtable.enumdef_ntoi(def)}
+  end
+end
+
+local function emit_file_warning(append)
+  append('// This file was generated by upbc (the upb compiler).\n')
+  append('// Do not edit -- your changes will be discarded when the file is\n')
+  append('// regenerated.\n\n')
+end
+
+--[[
+
+  Top-level, exported dumper functions
+
+--]]
+
+local function dump_defs_c(symtab, basename, append)
+  -- Add fielddefs for any msgdefs passed in.
+  local fielddefs = {}
+  for _, def in ipairs(symtab:getdefs(upb.DEF_MSG)) do
+    for field in def:fields() do
+      fielddefs[#fielddefs + 1] = field
+    end
+  end
+
+  -- Get a list of all defs and add fielddefs to it.
+  local defs = symtab:getdefs(upb.DEF_ANY)
+  for _, fielddef in ipairs(fielddefs) do
+    defs[#defs + 1] = fielddef
+  end
+
+  -- Sort all defs by (type, name).
+  -- This gives us a linear ordering that we can use to create offsets into
+  -- shared arrays like REFTABLES, hash table entries, and arrays.
+  table.sort(defs, function(a, b)
+    if a:def_type() ~= b:def_type() then
+      return a:def_type() < b:def_type()
+    else
+      return a:full_name() < b:full_name() end
+    end
+  )
+
+  -- Perform pre-pass to build the link table.
+  local linktab = LinkTable:new(basename, {
+    [upb.DEF_MSG] = "msgs",
+    [upb.DEF_FIELD] = "fields",
+    [upb.DEF_ENUM] = "enums",
+    intentries = "intentries",
+    strentries = "strentries",
+    arrays = "arrays",
+  })
+  for _, def in ipairs(defs) do
+    assert(def:is_frozen(), "can only dump frozen defs.")
+    linktab:add(def:def_type(), def)
+    local tables = gettables(def)
+    if tables then
+      for _, e in ipairs(tables.str.entries) do
+        linktab:add("strentries", e.ptr, e)
+      end
+      for _, e in ipairs(tables.int.entries) do
+        linktab:add("intentries", e.ptr, e)
+      end
+      for _, e in ipairs(tables.int.array) do
+        linktab:add("arrays", e.ptr, e)
+      end
+    end
+  end
+
+  -- Emit forward declarations.
+  emit_file_warning(append)
+  append('#include "upb/def.h"\n\n')
+  append("const upb_msgdef %s;\n", linktab:cdecl(upb.DEF_MSG))
+  append("const upb_fielddef %s;\n", linktab:cdecl(upb.DEF_FIELD))
+  append("const upb_enumdef %s;\n", linktab:cdecl(upb.DEF_ENUM))
+  append("const upb_tabent %s;\n", linktab:cdecl("strentries"))
+  append("const upb_tabent %s;\n", linktab:cdecl("intentries"))
+  append("const upb_value %s;\n", linktab:cdecl("arrays"))
+  append("\n")
+
+  -- Emit defs.
+  local dumper = Dumper:new(linktab)
+
+  append("const upb_msgdef %s = {\n", linktab:cdecl(upb.DEF_MSG))
+  for m in linktab:objs(upb.DEF_MSG) do
+    local tables = gettables(m)
+    -- UPB_MSGDEF_INIT(name, itof, ntof)
+    append('  UPB_MSGDEF_INIT("%s", %s, %s, %s),\n',
+           m:full_name(),
+           dumper:inttable(tables.int),
+           dumper:strtable(tables.str),
+           m:_selector_count())
+  end
+  append("};\n\n")
+
+  append("const upb_fielddef %s = {\n", linktab:cdecl(upb.DEF_FIELD))
+  for f in linktab:objs(upb.DEF_FIELD) do
+    local subdef = "NULL"
+    if f:has_subdef() then
+      subdef = string.format("upb_upcast(%s)", linktab:addr(f:subdef()))
+    end
+    -- UPB_FIELDDEF_INIT(label, type, name, num, msgdef, subdef,
+    --                   selector_base, default_value)
+    append('  UPB_FIELDDEF_INIT(%s, %s, "%s", %d, %s, %s, %d, %s),\n',
+           const(f, "label"), const(f, "type"), f:name(),
+           f:number(), linktab:addr(f:msgdef()), subdef,
+           f:_selector_base(),
+           dumper:value(nil) -- TODO
+           )
+  end
+  append("};\n\n")
+
+  append("const upb_enumdef %s = {\n", linktab:cdecl(upb.DEF_ENUM))
+  for e in linktab:objs(upb.DEF_ENUM) do
+    local tables = gettables(e)
+    -- UPB_ENUMDEF_INIT(name, ntoi, iton, defaultval)
+    append('  UPB_ENUMDEF_INIT("%s", %s, %s, %d),\n',
+           e:full_name(),
+           dumper:strtable(tables.str),
+           dumper:inttable(tables.int),
+           --e:default())
+           0)
+  end
+  append("};\n\n")
+
+  append("const upb_tabent %s = {\n", linktab:cdecl("strentries"))
+  for ent in linktab:objs("strentries") do
+    append(dumper:tabent(ent))
+  end
+  append("};\n\n");
+
+  append("const upb_tabent %s = {\n", linktab:cdecl("intentries"))
+  for ent in linktab:objs("intentries") do
+    append(dumper:tabent(ent))
+  end
+  append("};\n\n");
+
+  append("const upb_value %s = {\n", linktab:cdecl("arrays"))
+  for ent in linktab:objs("arrays") do
+    append(dumper:arrayval(ent))
+  end
+  append("};\n\n");
+
+  return linktab
+end
+
+local function join(...)
+  return table.concat({...}, ".")
+end
+
+local function to_cident(...)
+  return string.gsub(join(...), "%.", "_")
+end
+
+local function to_preproc(...)
+  return string.upper(to_cident(...))
+end
+
+local function dump_defs_h(symtab, basename, append, linktab)
+  local ucase_basename = string.upper(basename)
+  emit_file_warning(append)
+  append('#ifndef %s_UPB_H_\n', ucase_basename)
+  append('#define %s_UPB_H_\n\n', ucase_basename)
+  append('#include "upb/def.h"\n\n')
+  append('#ifdef __cplusplus\n')
+  append('extern "C" {\n')
+  append('#endif\n\n')
+
+  -- Dump C enums for proto enums.
+  append("// Enums\n\n")
+  for _, def in ipairs(symtab:getdefs(upb.DEF_ENUM)) do
+    local cident = to_cident(def:full_name())
+    append('typedef enum {\n')
+    for k, v in def:values() do
+      append('  %s = %d,\n', to_preproc(cident, k), v)
+    end
+    append('} %s;\n\n', cident)
+  end
+
+  -- Dump macros for referring to specific defs.
+  append("// Do not refer to these forward declarations; use the constants\n")
+  append("// below.\n")
+  append("extern const upb_msgdef %s;\n", linktab:cdecl(upb.DEF_MSG))
+  append("extern const upb_fielddef %s;\n", linktab:cdecl(upb.DEF_FIELD))
+  append("extern const upb_enumdef %s;\n\n", linktab:cdecl(upb.DEF_ENUM))
+  append("// Constants for references to defs.\n")
+  append("// We hide these behind macros to decouple users from the\n")
+  append("// details of how we have statically defined them (ie. whether\n")
+  append("// each def has its own symbol or lives in an array of defs).\n")
+  for def in linktab:objs(upb.DEF_MSG) do
+    append("#define %s %s\n", to_preproc(def:full_name()), linktab:addr(def))
+  end
+  append("\n")
+
+  append('#ifdef __cplusplus\n')
+  append('};  // extern "C"\n')
+  append('#endif\n\n')
+  append('#endif  // %s_UPB_H_\n', ucase_basename)
+end
+
+function export.dump_defs(symtab, basename, append_h, append_c)
+  local linktab = dump_defs_c(symtab, basename, append_c)
+  dump_defs_h(symtab, basename, append_h, linktab)
+end
+
+return export
--- a/tools/test_cinit.lua
+++ b/tools/test_cinit.lua
@ -0,0 +1,78 @@
+--[[
+
+  upb - a minimalist implementation of protocol buffers.
+
+  Copyright (c) 2012 Google Inc.  See LICENSE for details.
+  Author: Josh Haberman <jhaberman@gmail.com>
+
+  Tests for dump_cinit.lua.  Runs first in a mode that generates
+  some C code for an extension.  The C code is compiled and then
+  loaded by a second invocation of the test which checks that the
+  generated defs are as expected.
+
+--]]
+
+local dump_cinit = require "dump_cinit"
+local upb = require "upb"
+
+-- Once APIs for loading descriptors are fleshed out, we should replace this
+-- with a descriptor for a meaty protobuf like descriptor.proto.
+local symtab = upb.SymbolTable{
+  upb.EnumDef{full_name = "MyEnum",
+    values = {
+      {"FOO", 1},
+      {"BAR", 77}
+    }
+  },
+  upb.MessageDef{full_name = "MyMessage",
+    fields = {
+      upb.FieldDef{label = upb.LABEL_REQUIRED, name = "field1", number = 1,
+                   type = upb.TYPE_INT32},
+      upb.FieldDef{label = upb.LABEL_REPEATED, name = "field2", number = 2,
+                   type = upb.TYPE_ENUM, subdef_name = ".MyEnum"},
+      upb.FieldDef{name = "field3", number = 3, type = upb.TYPE_MESSAGE,
+                   subdef_name = ".MyMessage"}
+    }
+  }
+}
+
+if arg[1] == "generate" then
+  local f = assert(io.open(arg[2], "w"))
+  local f_h = assert(io.open(arg[2] .. ".h", "w"))
+  local appendc = dump_cinit.file_appender(f)
+  local appendh = dump_cinit.file_appender(f_h)
+  f:write('#include "lua.h"\n')
+  f:write('#define ELEMENTS(array) (sizeof(array)/sizeof(*array))\n')
+  f:write('#include "bindings/lua/upb.h"\n')
+  dump_cinit.dump_defs(symtab, "test", appendh, appendc)
+  f:write([[int luaopen_staticdefs(lua_State *L) {
+    lua_newtable(L);
+    for (int i = 0; i < ELEMENTS(test_msgs); i++) {
+      lupb_def_pushnewrapper(L, upb_upcast(&test_msgs[i]), NULL);
+      lua_rawseti(L, -2, i + 1);
+    }
+    for (int i = 0; i < ELEMENTS(test_enums); i++) {
+      lupb_def_pushnewrapper(L, upb_upcast(&test_enums[i]), NULL);
+      lua_rawseti(L, -2, ELEMENTS(test_msgs) + i + 1);
+    }
+    return 1;
+  }]])
+  f_h:close()
+  f:close()
+elseif arg[1] == "test" then
+  local staticdefs = require "staticdefs"
+
+  local msg = assert(staticdefs[1])
+  local enum = assert(staticdefs[2])
+  local f2 = assert(msg:field("field2"))
+  assert(msg:def_type() == upb.DEF_MSG)
+  assert(msg:full_name() == "MyMessage")
+  assert(enum:def_type() == upb.DEF_ENUM)
+  assert(enum:full_name() == "MyEnum")
+  assert(enum:value("FOO") == 1)
+  assert(f2:name() == "field2")
+  assert(f2:msgdef() == msg)
+  assert(f2:subdef() == enum)
+else
+  error("Unknown operation " .. arg[1])
+end
--- a/tools/upbc.c
+++ b/tools/upbc.c
@ -1,197 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Google Inc.  See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- *
- * upbc is the upb compiler, which at the moment simply takes a
- * protocol descriptor and outputs a header file containing the
- * names and types of the fields.
- */
-
-#include <ctype.h>
-#include <inttypes.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include "upb/bytestream.h"
-#include "upb/def.h"
-#include "upb/msg.h"
-#include "upb/pb/glue.h"
-
-/* These are in-place string transformations that do not change the length of
- * the string (and thus never need to re-allocate). */
-
-// Convert to C identifier: foo.bar.Baz -> foo_bar_Baz.
-static void to_cident(char *str) {
-  for (; *str; ++str) {
-    if(*str == '.' || *str == '/') *str = '_';
-  }
-}
-
-// Convert to C proprocessor identifier: foo.bar.Baz -> FOO_BAR_BAZ.
-static void to_preproc(char *str) {
-  to_cident(str);
-  for (; *str; ++str) *str = toupper(*str);
-}
-
-/* The _const.h file defines the constants (enums) defined in the .proto
- * file. */
-static void write_const_h(const upb_def *defs[], int num_entries,
-                          char *outfile_name, FILE *stream) {
-  /* Header file prologue. */
-  char *include_guard_name = strdup(outfile_name);
-  to_preproc(include_guard_name);
-
-  fputs("/* This file was generated by upbc (the upb compiler).  "
-        "Do not edit. */\n\n", stream),
-  fprintf(stream, "#ifndef %s\n", include_guard_name);
-  fprintf(stream, "#define %s\n\n", include_guard_name);
-  fputs("#ifdef __cplusplus\n", stream);
-  fputs("extern \"C\" {\n", stream);
-  fputs("#endif\n\n", stream);
-
-  /* Enums. */
-  fprintf(stream, "/* Enums. */\n\n");
-  for(int i = 0; i < num_entries; i++) {  /* Foreach enum */
-    if(defs[i]->type != UPB_DEF_ENUM) continue;
-    const upb_enumdef *enumdef = upb_downcast_enumdef_const(defs[i]);
-    char *enum_name = strdup(upb_def_fullname(UPB_UPCAST(enumdef)));
-    char *enum_val_prefix = strdup(enum_name);
-    to_cident(enum_name);
-    to_preproc(enum_val_prefix);
-
-    fprintf(stream, "typedef enum %s {\n", enum_name);
-    bool first = true;
-    /* Foreach enum value. */
-    upb_enum_iter iter;
-    for (upb_enum_begin(&iter, enumdef);
-         !upb_enum_done(&iter);
-         upb_enum_next(&iter)) {
-      char *value_name = strdup(upb_enum_iter_name(&iter));
-      uint32_t value = upb_enum_iter_number(&iter);
-      to_preproc(value_name);
-      /* "  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13," */
-      if (!first) fputs(",\n", stream);
-      first = false;
-      fprintf(stream, "  %s_%s = %" PRIu32, enum_val_prefix, value_name, value);
-      free(value_name);
-    }
-    fprintf(stream, "\n} %s;\n\n", enum_name);
-    free(enum_name);
-    free(enum_val_prefix);
-  }
-
-  /* Constants for field names and numbers. */
-  fprintf(stream, "/* Constants for field names and numbers. */\n\n");
-  for(int i = 0; i < num_entries; i++) {  /* Foreach enum */
-    const upb_msgdef *m = upb_dyncast_msgdef_const(defs[i]);
-    if(!m) continue;
-    char *msg_name = strdup(upb_def_fullname(UPB_UPCAST(m)));
-    char *msg_val_prefix = strdup(msg_name);
-    to_preproc(msg_val_prefix);
-    upb_msg_iter i;
-    for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
-      upb_fielddef *f = upb_msg_iter_field(&i);
-      char *preproc_field_name = strdup(upb_fielddef_name(f));
-      to_preproc(preproc_field_name);
-      fprintf(stream, "#define %s_%s__FIELDNUM %d\n",
-              msg_val_prefix, preproc_field_name, upb_fielddef_number(f));
-      fprintf(stream, "#define %s_%s__FIELDNAME \"%s\"\n",
-              msg_val_prefix, preproc_field_name, upb_fielddef_name(f));
-      fprintf(stream, "#define %s_%s__FIELDTYPE %d\n\n",
-              msg_val_prefix, preproc_field_name, upb_fielddef_type(f));
-      free(preproc_field_name);
-    }
-    free(msg_val_prefix);
-    free(msg_name);
-  }
-
-  /* Epilogue. */
-  fputs("#ifdef __cplusplus\n", stream);
-  fputs("}  /* extern \"C\" */\n", stream);
-  fputs("#endif\n\n", stream);
-  fprintf(stream, "#endif  /* %s */\n", include_guard_name);
-  free(include_guard_name);
-}
-
-const char usage[] =
-  "upbc -- upb compiler.\n"
-  "upb v0.1  http://blog.reverberate.org/upb/\n"
-  "\n"
-  "Usage: upbc [options] descriptor-file\n"
-  "\n"
-  "  -o OUTFILE-BASE    Write to OUTFILE-BASE.h and OUTFILE-BASE.c instead\n"
-  "                     of using the input file as a basename.\n"
-;
-
-void usage_err(const char *err) {
-  fprintf(stderr, "upbc: %s\n\n", err);
-  fputs(usage, stderr);
-  exit(1);
-}
-
-void error(const char *err, ...) {
-  va_list args;
-  va_start(args, err);
-  fprintf(stderr, "upbc: ");
-  vfprintf(stderr, err, args);
-  va_end(args);
-  exit(1);
-}
-
-int main(int argc, char *argv[]) {
-  /* Parse arguments. */
-  char *outfile_base = NULL, *input_file = NULL;
-  for(int i = 1; i < argc; i++) {
-    if(strcmp(argv[i], "-o") == 0) {
-      if(++i == argc)
-        usage_err("-o must be followed by a FILE-BASE.");
-      else if(outfile_base)
-        usage_err("-o was specified multiple times.");
-      outfile_base = argv[i];
-    } else {
-      if(input_file)
-        usage_err("You can only specify one input file.");
-      input_file = argv[i];
-    }
-  }
-  if(!input_file) usage_err("You must specify an input file.");
-  if(!outfile_base) outfile_base = input_file;
-
-  // Read and parse input file.
-  size_t len;
-  char *descriptor = upb_readfile(input_file, &len);
-  if(!descriptor)
-    error("Couldn't read input file.");
-
-  // TODO: make upb_parsedesc use a separate symtab, so we can use it here when
-  // importing descriptor.proto.
-  upb_symtab *s = upb_symtab_new();
-  upb_status status = UPB_STATUS_INIT;
-  upb_load_descriptor_into_symtab(s, descriptor, len, &status);
-  if(!upb_ok(&status)) {
-    error("Failed to parse input file descriptor: %s\n",
-          upb_status_getstr(&status));
-  }
-  upb_status_uninit(&status);
-
-  /* Emit output files. */
-  char h_const_filename[256];
-  const int maxsize = sizeof(h_const_filename);
-  if(snprintf(h_const_filename, maxsize, "%s_const.h", outfile_base) >= maxsize)
-    error("File base too long.\n");
-
-  FILE *h_const_file = fopen(h_const_filename, "w");
-  if(!h_const_file) error("Failed to open _const.h output file\n");
-
-  int symcount;
-  const upb_def **defs = upb_symtab_getdefs(s, &symcount, UPB_DEF_ANY, &defs);
-  write_const_h(defs, symcount, h_const_filename, h_const_file);
-  for (int i = 0; i < symcount; i++) upb_def_unref(defs[i], &defs);
-  free(defs);
-  free(descriptor);
-  upb_symtab_unref(s);
-  fclose(h_const_file);
-
-  return 0;
-}
--- a/tools/upbc.lua
+++ b/tools/upbc.lua
@ -0,0 +1,50 @@
+--[[
+
+  upb - a minimalist implementation of protocol buffers.
+
+  Copyright (c) 2012 Google Inc.  See LICENSE for details.
+  Author: Josh Haberman <jhaberman@gmail.com>
+
+  The upb compiler.  Unlike the proto2 compiler, this does
+  not output any parsing code or generated classes or anything
+  specific to the protobuf binary format at all.  At the moment
+  it only dumps C initializers for upb_defs, so that a .proto
+  file can be represented in a .o file.
+
+--]]
+
+local dump_cinit = require "dump_cinit"
+local upb = require "upb"
+
+local src = arg[1]
+local outbase = arg[2]
+local basename = arg[3]
+local hfilename = outbase .. ".upb.h"
+local cfilename = outbase .. ".upb.c"
+
+if os.getenv("UPBC_VERBOSE") then
+  print("upbc:")
+  print(string.format("  source file=%s", src))
+  print(string.format("  output file base=%s", outbase))
+  print(string.format("  hfilename=%s", hfilename))
+  print(string.format("  cfilename=%s", cfilename))
+end
+
+-- Open input/output files.
+local f = assert(io.open(src, "r"), "couldn't open input file " .. src)
+local descriptor = f:read("*all")
+local symtab = upb.SymbolTable()
+symtab:load_descriptor(descriptor)
+
+os.execute(string.format("mkdir -p `dirname %s`", outbase))
+local hfile = assert(io.open(hfilename, "w"), "couldn't open " .. hfilename)
+local cfile = assert(io.open(cfilename, "w"), "couldn't open " .. cfilename)
+
+local happend = dump_cinit.file_appender(hfile)
+local cappend = dump_cinit.file_appender(cfile)
+
+-- Dump defs
+dump_cinit.dump_defs(symtab, basename, happend, cappend)
+
+hfile:close()
+cfile:close()
--- a/upb/bytestream.c
+++ b/upb/bytestream.c
@ -7,11 +7,13 @@

 #include "upb/bytestream.h"

-#include <stddef.h>
 #include <stdlib.h>
 #include <string.h>

-char *upb_byteregion_strdup(const struct _upb_byteregion *r) {
+
+/* upb_byteregion *************************************************************/
+
+char *upb_byteregion_strdup(const upb_byteregion *r) {
  char *ret = malloc(upb_byteregion_len(r) + 1);
  upb_byteregion_copyall(r, ret);
  ret[upb_byteregion_len(r)] = '\0';
--- a/upb/bytestream.h
+++ b/upb/bytestream.h
@ -73,16 +73,18 @@
 #ifndef UPB_BYTESTREAM_H
 #define UPB_BYTESTREAM_H

-#include <errno.h>
-#include <stdarg.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
 #include "upb.h"

 #ifdef __cplusplus
+namespace upb {
+class ByteRegion;
+class StringSource;
+}  // namespace upb
+typedef upb::StringSource upb_stringsrc;
 extern "C" {
+#else
+struct upb_stringsrc;
+typedef struct upb_stringsrc upb_stringsrc;
 #endif

 typedef enum {
@ -185,22 +187,91 @@ INLINE const char *upb_bytesrc_getptr(const upb_bytesrc *src, uint64_t ofs,

 #define UPB_NONDELIMITED (0xffffffffffffffffULL)

-typedef struct _upb_byteregion {
+#ifdef __cplusplus
+}  // extern "C"
+
+class upb::ByteRegion {
+ public:
+  static const uint64_t kNondelimited = UPB_NONDELIMITED;
+  typedef upb_bytesuccess_t ByteSuccess;
+
+  // Accessors for the regions bounds -- the meaning of these is described in
+  // the diagram above.
+  uint64_t start_ofs() const;
+  uint64_t discard_ofs() const;
+  uint64_t fetch_ofs() const;
+  uint64_t end_ofs() const;
+
+  // Returns how many bytes are fetched and available for reading starting from
+  // offset "offset".
+  uint64_t BytesAvailable(uint64_t offset) const;
+
+  // Returns the total number of bytes remaining after offset "offset", or
+  // kNondelimited if the byteregion is non-delimited.
+  uint64_t BytesRemaining(uint64_t offset) const;
+
+  uint64_t Length() const;
+
+  // Sets the value of this byteregion to be a subset of the given byteregion's
+  // data.  The caller is responsible for releasing this region before the src
+  // region is released (unless the region is first pinned, if pinning support
+  // is added.  see below).
+  void Reset(const upb_byteregion *src, uint64_t ofs, uint64_t len);
+  void Release();
+
+  // Attempts to fetch more data, extending the fetched range of this
+  // byteregion.  Returns true if the fetched region was extended by at least
+  // one byte, false on EOF or error (see *s for details).
+  ByteSuccess Fetch();
+
+  // Fetches all remaining data, returning false if the operation failed (see
+  // *s for details).  May only be used on delimited byteregions.
+  ByteSuccess FetchAll();
+
+  // Discards bytes from the byteregion up until ofs (which must be greater or
+  // equal to discard_ofs()).  It is valid to discard bytes that have not been
+  // fetched (such bytes will never be fetched) but it is an error to discard
+  // past the end of a delimited byteregion.
+  void Discard(uint64_t ofs);
+
+  // Copies "len" bytes of data into "dst", starting at ofs.  The specified
+  // region must be available.
+  void Copy(uint64_t ofs, size_t len, char *dst) const;
+
+  // Copies all bytes from the byteregion into dst.  Requires that the entire
+  // byteregion is fetched and that none has been discarded.
+  void CopyAll(char *dst) const;
+
+  // Returns a pointer to the internal buffer for the byteregion starting at
+  // offset "ofs." Stores the number of bytes available in this buffer in *len.
+  // The returned buffer is invalidated when the byteregion is reset or
+  // released, or when the bytes are discarded.  If the byteregion is not
+  // currently pinned, the pointer is only valid for the lifetime of the parent
+  // byteregion.
+  const char *GetPtr(uint64_t ofs, size_t *len) const;
+
+  // Copies the contents of the byteregion into a newly-allocated,
+  // NULL-terminated string.  Requires that the byteregion is fully fetched.
+  char *StrDup() const;
+
+  template <typename T> void AssignToString(T* str);
+
+#else
+struct upb_byteregion {
+#endif
  uint64_t start;
  uint64_t discard;
  uint64_t fetch;
  uint64_t end;         // UPB_NONDELIMITED if nondelimited.
  upb_bytesrc *bytesrc;
  bool toplevel;        // If true, discards hit the underlying bytesrc.
-} upb_byteregion;
+};

-// Initializes a byteregion.  Its initial value will be empty.  No methods may
-// be called on an empty byteregion except upb_byteregion_reset().
-void upb_byteregion_init(upb_byteregion *r);
-void upb_byteregion_uninit(upb_byteregion *r);
+#ifdef __cplusplus
+extern "C" {
+#endif

-// Accessors for the regions bounds -- the meaning of these is described in the
-// diagram above.
+// Native C API.
 INLINE uint64_t upb_byteregion_startofs(const upb_byteregion *r) {
  return r->start;
 }
@ -213,17 +284,11 @@ INLINE uint64_t upb_byteregion_fetchofs(const upb_byteregion *r) {
 INLINE uint64_t upb_byteregion_endofs(const upb_byteregion *r) {
  return r->end;
 }
-
-// Returns how many bytes are fetched and available for reading starting
-// from offset "o".
 INLINE uint64_t upb_byteregion_available(const upb_byteregion *r, uint64_t o) {
  assert(o >= upb_byteregion_discardofs(r));
  assert(o <= r->fetch);  // Could relax this.
  return r->fetch - o;
 }
-
-// Returns the total number of bytes remaining after offset "o", or
-// UPB_NONDELIMITED if the byteregion is non-delimited.
 INLINE uint64_t upb_byteregion_remaining(const upb_byteregion *r, uint64_t o) {
  return r->end == UPB_NONDELIMITED ? UPB_NONDELIMITED : r->end - o;
 }
@ -231,22 +296,10 @@ INLINE uint64_t upb_byteregion_remaining(const upb_byteregion *r, uint64_t o) {
 INLINE uint64_t upb_byteregion_len(const upb_byteregion *r) {
  return upb_byteregion_remaining(r, r->start);
 }
-
-// Sets the value of this byteregion to be a subset of the given byteregion's
-// data.  The caller is responsible for releasing this region before the src
-// region is released (unless the region is first pinned, if pinning support is
-// added.  see below).
 void upb_byteregion_reset(upb_byteregion *r, const upb_byteregion *src,
                          uint64_t ofs, uint64_t len);
 void upb_byteregion_release(upb_byteregion *r);
-
-// Attempts to fetch more data, extending the fetched range of this byteregion.
-// Returns true if the fetched region was extended by at least one byte, false
-// on EOF or error (see *s for details).
 upb_bytesuccess_t upb_byteregion_fetch(upb_byteregion *r);
-
-// Fetches all remaining data for "r", returning the success of the operation
-// May only be used on delimited byteregions.
 INLINE upb_bytesuccess_t upb_byteregion_fetchall(upb_byteregion *r) {
  assert(upb_byteregion_len(r) != UPB_NONDELIMITED);
  upb_bytesuccess_t ret;
@ -255,11 +308,6 @@ INLINE upb_bytesuccess_t upb_byteregion_fetchall(upb_byteregion *r) {
  } while (ret == UPB_BYTE_OK);
  return ret == UPB_BYTE_EOF ? UPB_BYTE_OK : ret;
 }
-
-// Discards bytes from the byteregion up until ofs (which must be greater or
-// equal to upb_byteregion_discardofs()).  It is valid to discard bytes that
-// have not been fetched (such bytes will never be fetched) but it is an error
-// to discard past the end of a delimited byteregion.
 INLINE void upb_byteregion_discard(upb_byteregion *r, uint64_t ofs) {
  assert(ofs >= upb_byteregion_discardofs(r));
  assert(ofs <= upb_byteregion_endofs(r));
@ -267,28 +315,16 @@ INLINE void upb_byteregion_discard(upb_byteregion *r, uint64_t ofs) {
  if (ofs > r->fetch) r->fetch = ofs;
  if (r->toplevel) upb_bytesrc_discard(r->bytesrc, ofs);
 }
-
-// Copies "len" bytes of data into "dst", starting at ofs.  The specified
-// region must be available.
 INLINE void upb_byteregion_copy(const upb_byteregion *r, uint64_t ofs,
                                size_t len, char *dst) {
  assert(ofs >= upb_byteregion_discardofs(r));
  assert(len <= upb_byteregion_available(r, ofs));
  upb_bytesrc_copy(r->bytesrc, ofs, len, dst);
 }
-
-// Copies all bytes from the byteregion into dst.  Requires that the entire
-// byteregion is fetched and that none has been discarded.
 INLINE void upb_byteregion_copyall(const upb_byteregion *r, char *dst) {
  assert(r->start == r->discard && r->end == r->fetch);
  upb_byteregion_copy(r, r->start, upb_byteregion_len(r), dst);
 }
-
-// Returns a pointer to the internal buffer for the byteregion starting at
-// offset "ofs." Stores the number of bytes available in this buffer in *len.
-// The returned buffer is invalidated when the byteregion is reset or released,
-// or when the bytes are discarded.  If the byteregion is not currently pinned,
-// the pointer is only valid for the lifetime of the parent byteregion.
 INLINE const char *upb_byteregion_getptr(const upb_byteregion *r,
                                         uint64_t ofs, size_t *len) {
  assert(ofs >= upb_byteregion_discardofs(r));
@ -354,9 +390,7 @@ INLINE int upb_bytesink_write(upb_bytesink *s, const void *buf, int len) {
  return s->vtbl->write(s, buf, len);
 }

-INLINE int upb_bytesink_writestr(upb_bytesink *sink, const char *str) {
-  return upb_bytesink_write(sink, str, strlen(str));
-}
+#define upb_bytesink_writestr(s, buf) upb_bytesink_write(s, buf, strlen(buf))

 // Returns the number of bytes written or -1 on error.
 INLINE int upb_bytesink_printf(upb_bytesink *sink, const char *fmt, ...) {
@ -413,27 +447,47 @@ INLINE void upb_bytesink_rewind(upb_bytesink *sink, uint64_t offset) {

 // bytesrc/bytesink for a simple contiguous string.

-typedef struct {
+#ifdef __cplusplus
+}  // extern "C"
+
+class upb::StringSource {
+ public:
+  StringSource();
+  template <typename T> explicit StringSource(const T& str);
+  StringSource(const char *data, size_t len);
+  ~StringSource();
+
+  // Resets the stringsrc to a state where it will vend the given string.  The
+  // string data must be valid until the stringsrc is reset again or destroyed.
+  void Reset(const char* data, size_t len);
+  template <typename T> void Reset(const T& str);
+
+  // Returns the top-level upb_byteregion* for this stringsrc.  Invalidated
+  // when the stringsrc is reset.
+  ByteRegion* AllBytes();
+
+  upb_bytesrc* ByteSource();
+
+#else
+struct upb_stringsrc {
+#endif
  upb_bytesrc bytesrc;
  const char *str;
  size_t len;
  upb_byteregion byteregion;
-} upb_stringsrc;
+};

-// Create/free a stringsrc.
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Native C API.
 void upb_stringsrc_init(upb_stringsrc *s);
 void upb_stringsrc_uninit(upb_stringsrc *s);
-
-// Resets the stringsrc to a state where it will vend the given string.  The
-// string data must be valid until the stringsrc is reset again or destroyed.
 void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len);
-
 INLINE upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
  return &s->bytesrc;
 }
-
-// Returns the top-level upb_byteregion* for this stringsrc.  Invalidated when
-// the stringsrc is reset.
 INLINE upb_byteregion *upb_stringsrc_allbytes(upb_stringsrc *s) {
  return &s->byteregion;
 }
@ -465,7 +519,111 @@ const char *upb_stringsink_release(upb_stringsink *s, size_t *len);
 upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s);

 #ifdef __cplusplus
-}  /* extern "C" */
+}  // extern "C"
+
+namespace upb {
+
+inline uint64_t ByteRegion::start_ofs() const {
+  return upb_byteregion_startofs(this);
+}
+inline uint64_t ByteRegion::discard_ofs() const {
+  return upb_byteregion_discardofs(this);
+}
+inline uint64_t ByteRegion::fetch_ofs() const {
+  return upb_byteregion_fetchofs(this);
+}
+inline uint64_t ByteRegion::end_ofs() const {
+  return upb_byteregion_endofs(this);
+}
+inline uint64_t ByteRegion::BytesAvailable(uint64_t offset) const {
+  return upb_byteregion_available(this, offset);
+}
+inline uint64_t ByteRegion::BytesRemaining(uint64_t offset) const {
+  return upb_byteregion_remaining(this, offset);
+}
+inline uint64_t ByteRegion::Length() const {
+  return upb_byteregion_len(this);
+}
+inline void ByteRegion::Reset(
+    const upb_byteregion *src, uint64_t ofs, uint64_t len) {
+  upb_byteregion_reset(this, src, ofs, len);
+}
+inline void ByteRegion::Release() {
+  upb_byteregion_release(this);
+}
+inline ByteRegion::ByteSuccess ByteRegion::Fetch() {
+  return upb_byteregion_fetch(this);
+}
+inline ByteRegion::ByteSuccess ByteRegion::FetchAll() {
+  return upb_byteregion_fetchall(this);
+}
+inline void ByteRegion::Discard(uint64_t ofs) {
+  upb_byteregion_discard(this, ofs);
+}
+inline void ByteRegion::Copy(uint64_t ofs, size_t len, char *dst) const {
+  upb_byteregion_copy(this, ofs, len, dst);
+}
+inline void ByteRegion::CopyAll(char *dst) const {
+  upb_byteregion_copyall(this, dst);
+}
+inline const char *ByteRegion::GetPtr(uint64_t ofs, size_t *len) const {
+  return upb_byteregion_getptr(this, ofs, len);
+}
+inline char *ByteRegion::StrDup() const {
+  return upb_byteregion_strdup(this);
+}
+template <typename T> void ByteRegion::AssignToString(T* str) {
+  uint64_t ofs = start_ofs();
+  size_t len;
+  const char *ptr = GetPtr(ofs, &len);
+  // Emperically calling reserve() here is counterproductive and slows down
+  // benchmarks.  If the parsing is happening in a tight loop that is reusing
+  // the string object, there is probably enough data reserved already and
+  // the reserve() call is extra overhead.
+  str->assign(ptr, len);
+  ofs += len;
+  while (ofs < end_ofs()) {
+    ptr = GetPtr(ofs, &len);
+    str->append(ptr, len);
+    ofs += len;
+  }
+}
+
+template <> inline ByteRegion* GetValue<ByteRegion*>(Value v) {
+  return static_cast<ByteRegion*>(upb_value_getbyteregion(v));
+}
+
+template <> inline Value MakeValue<ByteRegion*>(ByteRegion* v) {
+  return upb_value_byteregion(v);
+}
+
+inline StringSource::StringSource() { upb_stringsrc_init(this); }
+template <typename T> StringSource::StringSource(const T& str) {
+  upb_stringsrc_init(this);
+  Reset(str);
+}
+inline StringSource::StringSource(const char *data, size_t len) {
+  upb_stringsrc_init(this);
+  Reset(data, len);
+}
+inline StringSource::~StringSource() {
+  upb_stringsrc_uninit(this);
+}
+inline void StringSource::Reset(const char* data, size_t len) {
+  upb_stringsrc_reset(this, data, len);
+}
+template <typename T> void StringSource::Reset(const T& str) {
+  upb_stringsrc_reset(this, str.c_str(), str.size());
+}
+inline ByteRegion* StringSource::AllBytes() {
+  return upb_stringsrc_allbytes(this);
+}
+inline upb_bytesrc* StringSource::ByteSource() {
+  return upb_stringsrc_bytesrc(this);
+}
+
+}  // namespace upb
+
 #endif

 #endif
--- a/upb/def.c
+++ b/upb/def.c
--- a/upb/def.h
+++ b/upb/def.h
--- a/upb/descriptor/descriptor.proto
+++ b/upb/descriptor/descriptor.proto
--- a/upb/descriptor/descriptor.upb.c
+++ b/upb/descriptor/descriptor.upb.c
@ -0,0 +1,483 @@
+// This file was generated by upbc (the upb compiler).
+// Do not edit -- your changes will be discarded when the file is
+// regenerated.
+
+#include "upb/def.h"
+
+const upb_msgdef google_protobuf_msgs[20];
+const upb_fielddef google_protobuf_fields[73];
+const upb_enumdef google_protobuf_enums[4];
+const upb_tabent google_protobuf_strentries[192];
+const upb_tabent google_protobuf_intentries[66];
+const upb_value google_protobuf_arrays[97];
+
+const upb_msgdef google_protobuf_msgs[20] = {
+  UPB_MSGDEF_INIT("google.protobuf.DescriptorProto", UPB_INTTABLE_INIT(2, 3, 9, 2, &google_protobuf_intentries[0], &google_protobuf_arrays[0], 6, 5), UPB_STRTABLE_INIT(7, 15, 9, 4, &google_protobuf_strentries[0]), 31),
+  UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ExtensionRange", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[6], 4, 2), UPB_STRTABLE_INIT(2, 3, 9, 2, &google_protobuf_strentries[16]), 2),
+  UPB_MSGDEF_INIT("google.protobuf.EnumDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[10], 4, 3), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[20]), 11),
+  UPB_MSGDEF_INIT("google.protobuf.EnumOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[4], &google_protobuf_arrays[14], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[24]), 5),
+  UPB_MSGDEF_INIT("google.protobuf.EnumValueDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[15], 4, 3), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[28]), 7),
+  UPB_MSGDEF_INIT("google.protobuf.EnumValueOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[6], &google_protobuf_arrays[19], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[32]), 5),
+  UPB_MSGDEF_INIT("google.protobuf.FieldDescriptorProto", UPB_INTTABLE_INIT(3, 3, 9, 2, &google_protobuf_intentries[8], &google_protobuf_arrays[20], 6, 5), UPB_STRTABLE_INIT(8, 15, 9, 4, &google_protobuf_strentries[36]), 18),
+  UPB_MSGDEF_INIT("google.protobuf.FieldOptions", UPB_INTTABLE_INIT(2, 3, 9, 2, &google_protobuf_intentries[12], &google_protobuf_arrays[26], 5, 3), UPB_STRTABLE_INIT(5, 7, 9, 3, &google_protobuf_strentries[52]), 11),
+  UPB_MSGDEF_INIT("google.protobuf.FileDescriptorProto", UPB_INTTABLE_INIT(4, 7, 9, 3, &google_protobuf_intentries[16], &google_protobuf_arrays[31], 6, 5), UPB_STRTABLE_INIT(9, 15, 9, 4, &google_protobuf_strentries[60]), 37),
+  UPB_MSGDEF_INIT("google.protobuf.FileDescriptorSet", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[37], 3, 1), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[76]), 5),
+  UPB_MSGDEF_INIT("google.protobuf.FileOptions", UPB_INTTABLE_INIT(8, 15, 9, 4, &google_protobuf_intentries[24], &google_protobuf_arrays[40], 6, 1), UPB_STRTABLE_INIT(9, 15, 9, 4, &google_protobuf_strentries[80]), 17),
+  UPB_MSGDEF_INIT("google.protobuf.MessageOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[40], &google_protobuf_arrays[46], 4, 2), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[96]), 7),
+  UPB_MSGDEF_INIT("google.protobuf.MethodDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[50], 5, 4), UPB_STRTABLE_INIT(4, 7, 9, 3, &google_protobuf_strentries[100]), 12),
+  UPB_MSGDEF_INIT("google.protobuf.MethodOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[42], &google_protobuf_arrays[55], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[108]), 5),
+  UPB_MSGDEF_INIT("google.protobuf.ServiceDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[56], 4, 3), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[112]), 11),
+  UPB_MSGDEF_INIT("google.protobuf.ServiceOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[44], &google_protobuf_arrays[60], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[116]), 5),
+  UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[61], 3, 1), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[120]), 5),
+  UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo.Location", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[64], 4, 2), UPB_STRTABLE_INIT(2, 3, 9, 2, &google_protobuf_strentries[124]), 6),
+  UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption", UPB_INTTABLE_INIT(3, 3, 9, 2, &google_protobuf_intentries[46], &google_protobuf_arrays[68], 6, 4), UPB_STRTABLE_INIT(7, 15, 9, 4, &google_protobuf_strentries[128]), 17),
+  UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption.NamePart", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[74], 4, 2), UPB_STRTABLE_INIT(2, 3, 9, 2, &google_protobuf_strentries[144]), 4),
+};
+
+const upb_fielddef google_protobuf_fields[73] = {
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "aggregate_value", 8, &google_protobuf_msgs[18], NULL, 10, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "cc_generic_services", 16, &google_protobuf_msgs[10], NULL, 3, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, "ctype", 1, &google_protobuf_msgs[7], upb_upcast(&google_protobuf_enums[2]), 0, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "default_value", 7, &google_protobuf_msgs[6], NULL, 15, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, "dependency", 3, &google_protobuf_msgs[8], NULL, 8, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "deprecated", 3, &google_protobuf_msgs[7], NULL, 2, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, "double_value", 6, &google_protobuf_msgs[18], NULL, 13, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, "end", 2, &google_protobuf_msgs[1], NULL, 1, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "enum_type", 4, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[2]), 15, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "enum_type", 5, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[2]), 18, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "experimental_map_key", 9, &google_protobuf_msgs[7], NULL, 3, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "extendee", 2, &google_protobuf_msgs[6], NULL, 3, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "extension", 7, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[6]), 34, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "extension", 6, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[6]), 25, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "extension_range", 5, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[1]), 20, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "field", 2, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[6]), 5, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "file", 1, &google_protobuf_msgs[9], upb_upcast(&google_protobuf_msgs[8]), 2, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "identifier_value", 3, &google_protobuf_msgs[18], NULL, 5, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "input_type", 2, &google_protobuf_msgs[12], NULL, 3, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, "is_extension", 2, &google_protobuf_msgs[19], NULL, 3, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "java_generate_equals_and_hash", 20, &google_protobuf_msgs[10], NULL, 6, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "java_generic_services", 17, &google_protobuf_msgs[10], NULL, 4, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "java_multiple_files", 10, &google_protobuf_msgs[10], NULL, 16, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "java_outer_classname", 8, &google_protobuf_msgs[10], NULL, 12, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "java_package", 1, &google_protobuf_msgs[10], NULL, 0, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, "label", 4, &google_protobuf_msgs[6], upb_upcast(&google_protobuf_enums[0]), 7, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "location", 1, &google_protobuf_msgs[16], upb_upcast(&google_protobuf_msgs[17]), 2, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "message_set_wire_format", 1, &google_protobuf_msgs[11], NULL, 0, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "message_type", 4, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[0]), 13, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "method", 2, &google_protobuf_msgs[14], upb_upcast(&google_protobuf_msgs[12]), 5, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[12], NULL, 0, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[4], NULL, 0, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[14], NULL, 0, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[2], NULL, 0, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[6], NULL, 0, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "name", 2, &google_protobuf_msgs[18], upb_upcast(&google_protobuf_msgs[19]), 2, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[0], NULL, 0, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[8], NULL, 0, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, "name_part", 1, &google_protobuf_msgs[19], NULL, 0, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, "negative_int_value", 5, &google_protobuf_msgs[18], NULL, 9, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "nested_type", 3, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[0]), 10, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "no_standard_descriptor_accessor", 2, &google_protobuf_msgs[11], NULL, 1, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, "number", 2, &google_protobuf_msgs[4], NULL, 3, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, "number", 3, &google_protobuf_msgs[6], NULL, 6, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, "optimize_for", 9, &google_protobuf_msgs[10], upb_upcast(&google_protobuf_enums[3]), 15, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 4, &google_protobuf_msgs[12], upb_upcast(&google_protobuf_msgs[13]), 9, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 3, &google_protobuf_msgs[14], upb_upcast(&google_protobuf_msgs[15]), 8, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 8, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[10]), 21, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 3, &google_protobuf_msgs[2], upb_upcast(&google_protobuf_msgs[3]), 8, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 7, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[11]), 28, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 8, &google_protobuf_msgs[6], upb_upcast(&google_protobuf_msgs[7]), 9, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 3, &google_protobuf_msgs[4], upb_upcast(&google_protobuf_msgs[5]), 4, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "output_type", 3, &google_protobuf_msgs[12], NULL, 6, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "package", 2, &google_protobuf_msgs[8], NULL, 3, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "packed", 2, &google_protobuf_msgs[7], NULL, 1, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, "path", 1, &google_protobuf_msgs[17], NULL, 2, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, "positive_int_value", 4, &google_protobuf_msgs[18], NULL, 8, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "py_generic_services", 18, &google_protobuf_msgs[10], NULL, 5, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "service", 6, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[14]), 29, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "source_code_info", 9, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[16]), 24, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, "span", 2, &google_protobuf_msgs[17], NULL, 5, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, "start", 1, &google_protobuf_msgs[1], NULL, 0, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, "string_value", 7, &google_protobuf_msgs[18], NULL, 14, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, "type", 5, &google_protobuf_msgs[6], upb_upcast(&google_protobuf_enums[1]), 8, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "type_name", 6, &google_protobuf_msgs[6], NULL, 12, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[15], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[11], upb_upcast(&google_protobuf_msgs[18]), 4, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[13], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[10], upb_upcast(&google_protobuf_msgs[18]), 9, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[7], upb_upcast(&google_protobuf_msgs[18]), 8, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[3], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[5], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "value", 2, &google_protobuf_msgs[2], upb_upcast(&google_protobuf_msgs[4]), 5, UPB_VALUE_INIT_NONE),
+};
+
+const upb_enumdef google_protobuf_enums[4] = {
+  UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Label", UPB_STRTABLE_INIT(3, 3, 1, 2, &google_protobuf_strentries[148]), UPB_INTTABLE_INIT(0, 0, 8, 0, NULL, &google_protobuf_arrays[78], 4, 3), 0),
+  UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Type", UPB_STRTABLE_INIT(18, 31, 1, 5, &google_protobuf_strentries[152]), UPB_INTTABLE_INIT(12, 15, 8, 4, &google_protobuf_intentries[50], &google_protobuf_arrays[82], 7, 6), 0),
+  UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.CType", UPB_STRTABLE_INIT(3, 3, 1, 2, &google_protobuf_strentries[184]), UPB_INTTABLE_INIT(0, 0, 8, 0, NULL, &google_protobuf_arrays[89], 4, 3), 0),
+  UPB_ENUMDEF_INIT("google.protobuf.FileOptions.OptimizeMode", UPB_STRTABLE_INIT(3, 3, 1, 2, &google_protobuf_strentries[188]), UPB_INTTABLE_INIT(0, 0, 8, 0, NULL, &google_protobuf_arrays[93], 4, 3), 0),
+};
+
+const upb_tabent google_protobuf_strentries[192] = {
+  {UPB_TABKEY_STR("extension"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[13]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[36]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("field"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[15]), NULL},
+  {UPB_TABKEY_STR("extension_range"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[14]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("nested_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[40]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[49]), NULL},
+  {UPB_TABKEY_STR("enum_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[8]), &google_protobuf_strentries[14]},
+  {UPB_TABKEY_STR("start"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[61]), NULL},
+  {UPB_TABKEY_STR("end"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[7]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[72]), NULL},
+  {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[48]), NULL},
+  {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[33]), &google_protobuf_strentries[22]},
+  {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[70]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("number"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[42]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[51]), NULL},
+  {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[31]), &google_protobuf_strentries[30]},
+  {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[71]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("label"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[25]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[34]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("number"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[43]), &google_protobuf_strentries[49]},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("type_name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[64]), NULL},
+  {UPB_TABKEY_STR("extendee"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[11]), NULL},
+  {UPB_TABKEY_STR("type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[63]), &google_protobuf_strentries[48]},
+  {UPB_TABKEY_STR("default_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[3]), NULL},
+  {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[50]), NULL},
+  {UPB_TABKEY_STR("experimental_map_key"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[10]), &google_protobuf_strentries[58]},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("ctype"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[2]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("deprecated"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[5]), NULL},
+  {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[69]), NULL},
+  {UPB_TABKEY_STR("packed"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[54]), NULL},
+  {UPB_TABKEY_STR("extension"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[12]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[37]), NULL},
+  {UPB_TABKEY_STR("service"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[58]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("source_code_info"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[59]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("dependency"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[4]), NULL},
+  {UPB_TABKEY_STR("message_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[28]), NULL},
+  {UPB_TABKEY_STR("package"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[53]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[47]), NULL},
+  {UPB_TABKEY_STR("enum_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[9]), &google_protobuf_strentries[74]},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("file"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[16]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[68]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("cc_generic_services"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[1]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("java_multiple_files"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[22]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("java_generic_services"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[21]), &google_protobuf_strentries[94]},
+  {UPB_TABKEY_STR("java_generate_equals_and_hash"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[20]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("java_package"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[24]), NULL},
+  {UPB_TABKEY_STR("optimize_for"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[44]), NULL},
+  {UPB_TABKEY_STR("py_generic_services"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[57]), NULL},
+  {UPB_TABKEY_STR("java_outer_classname"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[23]), NULL},
+  {UPB_TABKEY_STR("message_set_wire_format"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[27]), &google_protobuf_strentries[98]},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[66]), NULL},
+  {UPB_TABKEY_STR("no_standard_descriptor_accessor"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[41]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[30]), NULL},
+  {UPB_TABKEY_STR("input_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[18]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("output_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[52]), NULL},
+  {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[45]), NULL},
+  {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[67]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[46]), &google_protobuf_strentries[114]},
+  {UPB_TABKEY_STR("method"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[29]), NULL},
+  {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[32]), &google_protobuf_strentries[113]},
+  {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[65]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("location"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[26]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("span"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[60]), NULL},
+  {UPB_TABKEY_STR("path"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[55]), &google_protobuf_strentries[126]},
+  {UPB_TABKEY_STR("double_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[6]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[35]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("negative_int_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[39]), NULL},
+  {UPB_TABKEY_STR("aggregate_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[0]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("positive_int_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[56]), NULL},
+  {UPB_TABKEY_STR("identifier_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[17]), NULL},
+  {UPB_TABKEY_STR("string_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[62]), &google_protobuf_strentries[142]},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("is_extension"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[19]), NULL},
+  {UPB_TABKEY_STR("name_part"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[38]), NULL},
+  {UPB_TABKEY_STR("LABEL_REQUIRED"), UPB_VALUE_INIT_INT32(2), &google_protobuf_strentries[150]},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("LABEL_REPEATED"), UPB_VALUE_INIT_INT32(3), NULL},
+  {UPB_TABKEY_STR("LABEL_OPTIONAL"), UPB_VALUE_INIT_INT32(1), NULL},
+  {UPB_TABKEY_STR("TYPE_FIXED64"), UPB_VALUE_INIT_INT32(6), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("TYPE_STRING"), UPB_VALUE_INIT_INT32(9), NULL},
+  {UPB_TABKEY_STR("TYPE_FLOAT"), UPB_VALUE_INIT_INT32(2), &google_protobuf_strentries[181]},
+  {UPB_TABKEY_STR("TYPE_DOUBLE"), UPB_VALUE_INIT_INT32(1), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("TYPE_INT32"), UPB_VALUE_INIT_INT32(5), NULL},
+  {UPB_TABKEY_STR("TYPE_SFIXED32"), UPB_VALUE_INIT_INT32(15), NULL},
+  {UPB_TABKEY_STR("TYPE_FIXED32"), UPB_VALUE_INIT_INT32(7), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("TYPE_MESSAGE"), UPB_VALUE_INIT_INT32(11), &google_protobuf_strentries[182]},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("TYPE_INT64"), UPB_VALUE_INIT_INT32(3), &google_protobuf_strentries[179]},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("TYPE_ENUM"), UPB_VALUE_INIT_INT32(14), NULL},
+  {UPB_TABKEY_STR("TYPE_UINT32"), UPB_VALUE_INIT_INT32(13), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("TYPE_UINT64"), UPB_VALUE_INIT_INT32(4), &google_protobuf_strentries[178]},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("TYPE_SFIXED64"), UPB_VALUE_INIT_INT32(16), NULL},
+  {UPB_TABKEY_STR("TYPE_BYTES"), UPB_VALUE_INIT_INT32(12), NULL},
+  {UPB_TABKEY_STR("TYPE_SINT64"), UPB_VALUE_INIT_INT32(18), NULL},
+  {UPB_TABKEY_STR("TYPE_BOOL"), UPB_VALUE_INIT_INT32(8), NULL},
+  {UPB_TABKEY_STR("TYPE_GROUP"), UPB_VALUE_INIT_INT32(10), NULL},
+  {UPB_TABKEY_STR("TYPE_SINT32"), UPB_VALUE_INIT_INT32(17), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("CORD"), UPB_VALUE_INIT_INT32(1), NULL},
+  {UPB_TABKEY_STR("STRING"), UPB_VALUE_INIT_INT32(0), &google_protobuf_strentries[185]},
+  {UPB_TABKEY_STR("STRING_PIECE"), UPB_VALUE_INIT_INT32(2), NULL},
+  {UPB_TABKEY_STR("CODE_SIZE"), UPB_VALUE_INIT_INT32(2), NULL},
+  {UPB_TABKEY_STR("SPEED"), UPB_VALUE_INIT_INT32(1), &google_protobuf_strentries[191]},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_STR("LITE_RUNTIME"), UPB_VALUE_INIT_INT32(3), NULL},
+};
+
+const upb_tabent google_protobuf_intentries[66] = {
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NUM(6), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[13]), NULL},
+  {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[49]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[70]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[71]), NULL},
+  {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[50]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NUM(6), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[64]), NULL},
+  {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[3]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NUM(9), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[10]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[69]), NULL},
+  {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[47]), NULL},
+  {UPB_TABKEY_NUM(9), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[59]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NUM(6), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[58]), NULL},
+  {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[12]), NULL},
+  {UPB_TABKEY_NUM(16), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[1]), NULL},
+  {UPB_TABKEY_NUM(17), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[21]), NULL},
+  {UPB_TABKEY_NUM(18), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[57]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NUM(20), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[20]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[68]), NULL},
+  {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[23]), NULL},
+  {UPB_TABKEY_NUM(9), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[44]), NULL},
+  {UPB_TABKEY_NUM(10), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[22]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[66]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[67]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[65]), NULL},
+  {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[0]), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NUM(6), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[6]), NULL},
+  {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[62]), NULL},
+  {UPB_TABKEY_NUM(16), UPB_VALUE_INIT_CONSTPTR("TYPE_SFIXED64"), NULL},
+  {UPB_TABKEY_NUM(17), UPB_VALUE_INIT_CONSTPTR("TYPE_SINT32"), NULL},
+  {UPB_TABKEY_NUM(18), UPB_VALUE_INIT_CONSTPTR("TYPE_SINT64"), NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL},
+  {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR("TYPE_FIXED32"), NULL},
+  {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR("TYPE_BOOL"), NULL},
+  {UPB_TABKEY_NUM(9), UPB_VALUE_INIT_CONSTPTR("TYPE_STRING"), NULL},
+  {UPB_TABKEY_NUM(10), UPB_VALUE_INIT_CONSTPTR("TYPE_GROUP"), NULL},
+  {UPB_TABKEY_NUM(11), UPB_VALUE_INIT_CONSTPTR("TYPE_MESSAGE"), NULL},
+  {UPB_TABKEY_NUM(12), UPB_VALUE_INIT_CONSTPTR("TYPE_BYTES"), NULL},
+  {UPB_TABKEY_NUM(13), UPB_VALUE_INIT_CONSTPTR("TYPE_UINT32"), NULL},
+  {UPB_TABKEY_NUM(14), UPB_VALUE_INIT_CONSTPTR("TYPE_ENUM"), NULL},
+  {UPB_TABKEY_NUM(15), UPB_VALUE_INIT_CONSTPTR("TYPE_SFIXED32"), NULL},
+};
+
+const upb_value google_protobuf_arrays[97] = {
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[36]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[15]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[40]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[8]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[14]),
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[61]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[7]),
+  UPB_ARRAY_EMPTYENT,
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[33]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[72]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[48]),
+  UPB_ARRAY_EMPTYENT,
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[31]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[42]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[51]),
+  UPB_ARRAY_EMPTYENT,
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[34]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[11]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[43]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[25]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[63]),
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[2]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[54]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[5]),
+  UPB_ARRAY_EMPTYENT,
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[37]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[53]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[4]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[28]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[9]),
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[16]),
+  UPB_ARRAY_EMPTYENT,
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[24]),
+  UPB_ARRAY_EMPTYENT,
+  UPB_ARRAY_EMPTYENT,
+  UPB_ARRAY_EMPTYENT,
+  UPB_ARRAY_EMPTYENT,
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[27]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[41]),
+  UPB_ARRAY_EMPTYENT,
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[30]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[18]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[52]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[45]),
+  UPB_ARRAY_EMPTYENT,
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[32]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[29]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[46]),
+  UPB_ARRAY_EMPTYENT,
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[26]),
+  UPB_ARRAY_EMPTYENT,
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[55]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[60]),
+  UPB_ARRAY_EMPTYENT,
+  UPB_ARRAY_EMPTYENT,
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[35]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[17]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[56]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[39]),
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[38]),
+  UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[19]),
+  UPB_ARRAY_EMPTYENT,
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR("LABEL_OPTIONAL"),
+  UPB_VALUE_INIT_CONSTPTR("LABEL_REQUIRED"),
+  UPB_VALUE_INIT_CONSTPTR("LABEL_REPEATED"),
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR("TYPE_DOUBLE"),
+  UPB_VALUE_INIT_CONSTPTR("TYPE_FLOAT"),
+  UPB_VALUE_INIT_CONSTPTR("TYPE_INT64"),
+  UPB_VALUE_INIT_CONSTPTR("TYPE_UINT64"),
+  UPB_VALUE_INIT_CONSTPTR("TYPE_INT32"),
+  UPB_VALUE_INIT_CONSTPTR("TYPE_FIXED64"),
+  UPB_VALUE_INIT_CONSTPTR("STRING"),
+  UPB_VALUE_INIT_CONSTPTR("CORD"),
+  UPB_VALUE_INIT_CONSTPTR("STRING_PIECE"),
+  UPB_ARRAY_EMPTYENT,
+  UPB_ARRAY_EMPTYENT,
+  UPB_VALUE_INIT_CONSTPTR("SPEED"),
+  UPB_VALUE_INIT_CONSTPTR("CODE_SIZE"),
+  UPB_VALUE_INIT_CONSTPTR("LITE_RUNTIME"),
+};
+
--- a/upb/descriptor/descriptor.upb.h
+++ b/upb/descriptor/descriptor.upb.h
@ -0,0 +1,90 @@
+// This file was generated by upbc (the upb compiler).
+// Do not edit -- your changes will be discarded when the file is
+// regenerated.
+
+#ifndef GOOGLE_PROTOBUF_UPB_H_
+#define GOOGLE_PROTOBUF_UPB_H_
+
+#include "upb/def.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Enums
+
+typedef enum {
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED64 = 6,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_STRING = 9,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FLOAT = 2,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_DOUBLE = 1,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT32 = 5,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED32 = 15,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED32 = 7,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_MESSAGE = 11,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT64 = 3,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_ENUM = 14,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT64 = 4,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED64 = 16,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BYTES = 12,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT64 = 18,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BOOL = 8,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_GROUP = 10,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT32 = 17,
+} google_protobuf_FieldDescriptorProto_Type;
+
+typedef enum {
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REQUIRED = 2,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REPEATED = 3,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_OPTIONAL = 1,
+} google_protobuf_FieldDescriptorProto_Label;
+
+typedef enum {
+  GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_CORD = 1,
+  GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING = 0,
+  GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING_PIECE = 2,
+} google_protobuf_FieldOptions_CType;
+
+typedef enum {
+  GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_CODE_SIZE = 2,
+  GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_SPEED = 1,
+  GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_LITE_RUNTIME = 3,
+} google_protobuf_FileOptions_OptimizeMode;
+
+// Do not refer to these forward declarations; use the constants
+// below.
+extern const upb_msgdef google_protobuf_msgs[20];
+extern const upb_fielddef google_protobuf_fields[73];
+extern const upb_enumdef google_protobuf_enums[4];
+
+// Constants for references to defs.
+// We hide these behind macros to decouple users from the
+// details of how we have statically defined them (ie. whether
+// each def has its own symbol or lives in an array of defs).
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO &google_protobuf_msgs[0]
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE &google_protobuf_msgs[1]
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO &google_protobuf_msgs[2]
+#define GOOGLE_PROTOBUF_ENUMOPTIONS &google_protobuf_msgs[3]
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO &google_protobuf_msgs[4]
+#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS &google_protobuf_msgs[5]
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO &google_protobuf_msgs[6]
+#define GOOGLE_PROTOBUF_FIELDOPTIONS &google_protobuf_msgs[7]
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO &google_protobuf_msgs[8]
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET &google_protobuf_msgs[9]
+#define GOOGLE_PROTOBUF_FILEOPTIONS &google_protobuf_msgs[10]
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS &google_protobuf_msgs[11]
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO &google_protobuf_msgs[12]
+#define GOOGLE_PROTOBUF_METHODOPTIONS &google_protobuf_msgs[13]
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO &google_protobuf_msgs[14]
+#define GOOGLE_PROTOBUF_SERVICEOPTIONS &google_protobuf_msgs[15]
+#define GOOGLE_PROTOBUF_SOURCECODEINFO &google_protobuf_msgs[16]
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION &google_protobuf_msgs[17]
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION &google_protobuf_msgs[18]
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART &google_protobuf_msgs[19]
+
+#ifdef __cplusplus
+};  // extern "C"
+#endif
+
+#endif  // GOOGLE_PROTOBUF_UPB_H_
--- a/upb/descriptor/descriptor_const.h
+++ b/upb/descriptor/descriptor_const.h
@ -1,349 +0,0 @@
-/* This file was generated by upbc (the upb compiler).  Do not edit. */
-
-#ifndef UPB_DESCRIPTOR_CONST_H
-#define UPB_DESCRIPTOR_CONST_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Enums. */
-
-typedef enum google_protobuf_FieldDescriptorProto_Type {
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED64 = 6,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_STRING = 9,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FLOAT = 2,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_DOUBLE = 1,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT32 = 5,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED32 = 15,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED32 = 7,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_MESSAGE = 11,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT64 = 3,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_ENUM = 14,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT64 = 4,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED64 = 16,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BYTES = 12,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT64 = 18,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BOOL = 8,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_GROUP = 10,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT32 = 17
-} google_protobuf_FieldDescriptorProto_Type;
-
-typedef enum google_protobuf_FieldDescriptorProto_Label {
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REQUIRED = 2,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REPEATED = 3,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_OPTIONAL = 1
-} google_protobuf_FieldDescriptorProto_Label;
-
-typedef enum google_protobuf_FieldOptions_CType {
-  GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_CORD = 1,
-  GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING = 0,
-  GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING_PIECE = 2
-} google_protobuf_FieldOptions_CType;
-
-typedef enum google_protobuf_FileOptions_OptimizeMode {
-  GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_CODE_SIZE = 2,
-  GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_SPEED = 1,
-  GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_LITE_RUNTIME = 3
-} google_protobuf_FileOptions_OptimizeMode;
-
-/* Constants for field names and numbers. */
-
-#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNUM 1
-#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNAME "path"
-#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDTYPE 5
-
-#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN__FIELDNUM 2
-#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN__FIELDNAME "span"
-#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN__FIELDTYPE 5
-
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME__FIELDNUM 2
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE__FIELDNUM 3
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE__FIELDNAME "identifier_value"
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE__FIELDNUM 4
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE__FIELDNAME "positive_int_value"
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE__FIELDTYPE 4
-
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDNUM 5
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDNAME "negative_int_value"
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDTYPE 3
-
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNUM 8
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNAME "aggregate_value"
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNUM 6
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNAME "double_value"
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDTYPE 1
-
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDNUM 7
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDNAME "string_value"
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDTYPE 12
-
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDNUM 2
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDNAME "package"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY__FIELDNUM 3
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY__FIELDNAME "dependency"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDNUM 4
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDNAME "message_type"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 5
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNUM 8
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDNUM 9
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDNAME "source_code_info"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNUM 6
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNAME "service"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNUM 7
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNAME "extension"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE__FIELDNUM 2
-#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE__FIELDNAME "input_type"
-#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE__FIELDNUM 3
-#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE__FIELDNAME "output_type"
-#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDNUM 4
-#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
-#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
-#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM 1
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNAME "file"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNUM 1
-#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNAME "location"
-#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START__FIELDNUM 1
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START__FIELDNAME "start"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START__FIELDTYPE 5
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDNUM 2
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDNAME "end"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDTYPE 5
-
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM 2
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNAME "number"
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE 5
-
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNUM 1
-#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNAME "ctype"
-#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDTYPE 14
-
-#define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED__FIELDNUM 2
-#define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED__FIELDNAME "packed"
-#define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED__FIELDTYPE 8
-
-#define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED__FIELDNUM 3
-#define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED__FIELDNAME "deprecated"
-#define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED__FIELDTYPE 8
-
-#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY__FIELDNUM 9
-#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY__FIELDNAME "experimental_map_key"
-#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
-#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
-#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDNUM 1
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDNAME "java_package"
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNUM 16
-#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNAME "cc_generic_services"
-#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDTYPE 8
-
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERIC_SERVICES__FIELDNUM 17
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERIC_SERVICES__FIELDNAME "java_generic_services"
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERIC_SERVICES__FIELDTYPE 8
-
-#define GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES__FIELDNUM 18
-#define GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES__FIELDNAME "py_generic_services"
-#define GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES__FIELDTYPE 8
-
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH__FIELDNUM 20
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH__FIELDNAME "java_generate_equals_and_hash"
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH__FIELDTYPE 8
-
-#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
-#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
-#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNUM 8
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNAME "java_outer_classname"
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNUM 9
-#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNAME "optimize_for"
-#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDTYPE 14
-
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNUM 10
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNAME "java_multiple_files"
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDTYPE 8
-
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM 2
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNAME "value"
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNUM 2
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNAME "method"
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM 2
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNAME "field"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM 3
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNAME "nested_type"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 4
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNUM 5
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNAME "extension_range"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNUM 6
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNAME "extension"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNUM 7
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
-#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
-#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE__FIELDNUM 2
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE__FIELDNAME "extendee"
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDNUM 3
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDNAME "number"
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDTYPE 5
-
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDNUM 4
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDNAME "label"
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDTYPE 14
-
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNUM 5
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNAME "type"
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDTYPE 14
-
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNUM 8
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNUM 6
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNAME "type_name"
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNUM 7
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNAME "default_value"
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
-#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
-#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNUM 1
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNAME "message_set_wire_format"
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDTYPE 8
-
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNUM 2
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNAME "no_standard_descriptor_accessor"
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDTYPE 8
-
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
-#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
-#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNUM 1
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNAME "name_part"
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNUM 2
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNAME "is_extension"
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDTYPE 8
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif  /* UPB_DESCRIPTOR_CONST_H */
--- a/upb/descriptor/reader.c
+++ b/upb/descriptor/reader.c
@ -3,21 +3,38 @@
 *
 * Copyright (c) 2008-2009 Google Inc.  See LICENSE for details.
 * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * XXX: The routines in this file that consume a string do not currently
+ * support having the string span buffers.  In the future, as upb_sink and
+ * its buffering/sharing functionality evolve there should be an easy and
+ * idiomatic way of correctly handling this case.  For now, we accept this
+ * limitation since we currently only parse descriptors from single strings.
 */

-#include <stdlib.h>
+#include "upb/descriptor/reader.h"
+
 #include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include "upb/bytestream.h"
 #include "upb/def.h"
-#include "upb/descriptor/descriptor_const.h"
-#include "upb/descriptor/reader.h"
+#include "upb/descriptor/descriptor.upb.h"
+
+static char *upb_strndup(const char *buf, size_t n) {
+  char *ret = malloc(n + 1);
+  if (!ret) return NULL;
+  memcpy(ret, buf, n);
+  ret[n] = '\0';
+  return ret;
+}

 // Returns a newly allocated string that joins input strings together, for example:
 //   join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
 //   join("", "Baz") -> "Baz"
-// Caller owns a ref on the returned string. */
+// Caller owns a ref on the returned string.
 static char *upb_join(const char *base, const char *name) {
  if (!base || strlen(base) == 0) {
-    return strdup(name);
+    return upb_strdup(name);
  } else {
    char *ret = malloc(strlen(base) + strlen(name) + 2);
    ret[0] = '\0';
@ -74,10 +91,6 @@ static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
  }
 }

-// Forward declares for top-level file descriptors.
-static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h);
-static upb_mhandlers * upb_enumdef_register_EnumDescriptorProto(upb_handlers *h);
-
 void upb_descreader_init(upb_descreader *r) {
  upb_deflist_init(&r->defs);
  upb_status_init(&r->status);
@ -107,7 +120,7 @@ static upb_msgdef *upb_descreader_top(upb_descreader *r) {
  if (r->stack_len <= 1) return NULL;
  int index = r->stack[r->stack_len-1].start - 1;
  assert(index >= 0);
-  return upb_downcast_msgdef(r->defs.defs[index]);
+  return upb_downcast_msgdef_mutable(r->defs.defs[index]);
 }

 static upb_def *upb_descreader_last(upb_descreader *r) {
@ -136,144 +149,80 @@ void upb_descreader_setscopename(upb_descreader *r, char *str) {
 }

 // Handlers for google.protobuf.FileDescriptorProto.
-static upb_flow_t upb_descreader_FileDescriptorProto_startmsg(void *_r) {
+static bool file_startmsg(void *_r) {
  upb_descreader *r = _r;
  upb_descreader_startcontainer(r);
-  return UPB_CONTINUE;
+  return true;
 }

-static void upb_descreader_FileDescriptorProto_endmsg(void *_r,
-                                                      upb_status *status) {
-  (void)status;
+static void file_endmsg(void *_r, upb_status *status) {
+  UPB_UNUSED(status);
  upb_descreader *r = _r;
  upb_descreader_endcontainer(r);
 }

-static upb_flow_t upb_descreader_FileDescriptorProto_package(void *_r,
-                                                             upb_value fval,
-                                                             upb_value val) {
-  (void)fval;
+static size_t file_onpackage(void *_r, void *fval, const char *buf, size_t n) {
+  UPB_UNUSED(fval);
  upb_descreader *r = _r;
-  upb_descreader_setscopename(
-      r, upb_byteregion_strdup(upb_value_getbyteregion(val)));
-  return UPB_CONTINUE;
-}
-
-static upb_mhandlers *upb_descreader_register_FileDescriptorProto(
-    upb_handlers *h) {
-  upb_mhandlers *m = upb_handlers_newmhandlers(h);
-  upb_mhandlers_setstartmsg(m, &upb_descreader_FileDescriptorProto_startmsg);
-  upb_mhandlers_setendmsg(m, &upb_descreader_FileDescriptorProto_endmsg);
-
-#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDNUM
-#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDTYPE
-  upb_fhandlers *f =
-      upb_mhandlers_newfhandlers(m, FNUM(PACKAGE), FTYPE(PACKAGE), false);
-  upb_fhandlers_setvalue(f, &upb_descreader_FileDescriptorProto_package);
-
-  upb_mhandlers_newfhandlers_subm(m, FNUM(MESSAGE_TYPE), FTYPE(MESSAGE_TYPE), true,
-                                  upb_msgdef_register_DescriptorProto(h));
-  upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true,
-                                  upb_enumdef_register_EnumDescriptorProto(h));
-  // TODO: services, extensions
-  return m;
-}
-#undef FNUM
-#undef FTYPE
-
-static upb_mhandlers *upb_descreader_register_FileDescriptorSet(upb_handlers *h) {
-  upb_mhandlers *m = upb_handlers_newmhandlers(h);
-
-#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDNUM
-#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDTYPE
-  upb_mhandlers_newfhandlers_subm(m, FNUM(FILE), FTYPE(FILE), true,
-                                   upb_descreader_register_FileDescriptorProto(h));
-  return m;
-}
-#undef FNUM
-#undef FTYPE
-
-upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h) {
-  h->should_jit = false;
-  return upb_descreader_register_FileDescriptorSet(h);
+  // XXX: see comment at the top of the file.
+  upb_descreader_setscopename(r, upb_strndup(buf, n));
+  return n;
 }

-// google.protobuf.EnumValueDescriptorProto.
-static upb_flow_t upb_enumdef_EnumValueDescriptorProto_startmsg(void *_r) {
+// Handlers for google.protobuf.EnumValueDescriptorProto.
+static bool enumval_startmsg(void *_r) {
  upb_descreader *r = _r;
  r->saw_number = false;
  r->saw_name = false;
-  return UPB_CONTINUE;
+  return true;
 }

-static upb_flow_t upb_enumdef_EnumValueDescriptorProto_name(void *_r,
-                                                            upb_value fval,
-                                                            upb_value val) {
-  (void)fval;
+static size_t enumval_onname(void *_r, void *fval, const char *buf, size_t n) {
+  UPB_UNUSED(fval);
  upb_descreader *r = _r;
+  // XXX: see comment at the top of the file.
  free(r->name);
-  r->name = upb_byteregion_strdup(upb_value_getbyteregion(val));
+  r->name = upb_strndup(buf, n);
  r->saw_name = true;
-  return UPB_CONTINUE;
+  return n;
 }

-static upb_flow_t upb_enumdef_EnumValueDescriptorProto_number(void *_r,
-                                                              upb_value fval,
-                                                              upb_value val) {
-  (void)fval;
+static bool enumval_onnumber(void *_r, void *fval, int32_t val) {
+  UPB_UNUSED(fval);
  upb_descreader *r = _r;
-  r->number = upb_value_getint32(val);
+  r->number = val;
  r->saw_number = true;
-  return UPB_CONTINUE;
+  return true;
 }

-static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r,
-                                                        upb_status *status) {
+static void enumval_endmsg(void *_r, upb_status *status) {
  upb_descreader *r = _r;
  if(!r->saw_number || !r->saw_name) {
    upb_status_seterrliteral(status, "Enum value missing name or number.");
    return;
  }
-  upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
+  upb_enumdef *e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
  if (upb_enumdef_numvals(e) == 0) {
    // The default value of an enum (in the absence of an explicit default) is
    // its first listed value.
    upb_enumdef_setdefault(e, r->number);
  }
-  upb_enumdef_addval(e, r->name, r->number);
+  upb_enumdef_addval(e, r->name, r->number, status);
  free(r->name);
  r->name = NULL;
 }

-static upb_mhandlers *upb_enumdef_register_EnumValueDescriptorProto(
-    upb_handlers *h) {
-  upb_mhandlers *m = upb_handlers_newmhandlers(h);
-  upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumValueDescriptorProto_startmsg);
-  upb_mhandlers_setendmsg(m, &upb_enumdef_EnumValueDescriptorProto_endmsg);
-
-#define FNUM(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDNUM
-#define FTYPE(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDTYPE
-  upb_fhandlers *f;
-  f = upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
-  upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_name);
-
-  f = upb_mhandlers_newfhandlers(m, FNUM(NUMBER), FTYPE(NUMBER), false);
-  upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_number);
-  return m;
-}
-#undef FNUM
-#undef FTYPE

-// google.protobuf.EnumDescriptorProto.
-static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_r) {
+// Handlers for google.protobuf.EnumDescriptorProto.
+static bool enum_startmsg(void *_r) {
  upb_descreader *r = _r;
-  upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new(&r->defs)));
-  return UPB_CONTINUE;
+  upb_deflist_push(&r->defs, upb_upcast(upb_enumdef_new(&r->defs)));
+  return true;
 }

-static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status) {
+static void enum_endmsg(void *_r, upb_status *status) {
  upb_descreader *r = _r;
-  upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
+  upb_enumdef *e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
  if (upb_def_fullname(upb_descreader_last((upb_descreader*)_r)) == NULL) {
    upb_status_seterrliteral(status, "Enum had no name.");
    return;
@ -284,46 +233,28 @@ static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status)
  }
 }

-static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_r,
-                                                       upb_value fval,
-                                                       upb_value val) {
-  (void)fval;
+static size_t enum_onname(void *_r, void *fval, const char *buf, size_t n) {
+  UPB_UNUSED(fval);
  upb_descreader *r = _r;
-  char *fullname = upb_byteregion_strdup(upb_value_getbyteregion(val));
+  // XXX: see comment at the top of the file.
+  char *fullname = upb_strndup(buf, n);
  upb_def_setfullname(upb_descreader_last(r), fullname);
  free(fullname);
-  return UPB_CONTINUE;
-}
-
-static upb_mhandlers *upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) {
-  upb_mhandlers *m = upb_handlers_newmhandlers(h);
-  upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumDescriptorProto_startmsg);
-  upb_mhandlers_setendmsg(m, &upb_enumdef_EnumDescriptorProto_endmsg);
-
-#define FNUM(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDNUM
-#define FTYPE(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDTYPE
-  upb_fhandlers *f =
-      upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
-  upb_fhandlers_setvalue(f, &upb_enumdef_EnumDescriptorProto_name);
-
-  upb_mhandlers_newfhandlers_subm(m, FNUM(VALUE), FTYPE(VALUE), true,
-                               upb_enumdef_register_EnumValueDescriptorProto(h));
-  return m;
+  return n;
 }
-#undef FNUM
-#undef FTYPE

-static upb_flow_t upb_fielddef_startmsg(void *_r) {
+// Handlers for google.protobuf.FieldDescriptorProto
+static bool field_startmsg(void *_r) {
  upb_descreader *r = _r;
  r->f = upb_fielddef_new(&r->defs);
  free(r->default_string);
  r->default_string = NULL;
-  return UPB_CONTINUE;
+  return true;
 }

 // Converts the default value in string "str" into "d".  Passes a ref on str.
 // Returns true on success.
-static bool upb_fielddef_parsedefault(char *str, upb_value *d, int type) {
+static bool parse_default(char *str, upb_value *d, int type) {
  bool success = true;
  if (str) {
    switch(type) {
@ -397,29 +328,24 @@ static bool upb_fielddef_parsedefault(char *str, upb_value *d, int type) {
  return success;
 }

-static void upb_fielddef_endmsg(void *_r, upb_status *status) {
+static void field_endmsg(void *_r, upb_status *status) {
  upb_descreader *r = _r;
  upb_fielddef *f = r->f;
  // TODO: verify that all required fields were present.
-  assert(f->number != -1 && upb_fielddef_name(f) != NULL);
-  assert((upb_fielddef_subtypename(f) != NULL) == upb_hassubdef(f));
-
-  // Field was successfully read, add it as a field of the msgdef.
-  upb_msgdef *m = upb_descreader_top(r);
-  upb_msgdef_addfield(m, f, &r->defs);
-  r->f = NULL;
+  assert(upb_fielddef_number(f) != 0 && upb_fielddef_name(f) != NULL);
+  assert((upb_fielddef_subdefname(f) != NULL) == upb_fielddef_hassubdef(f));

  if (r->default_string) {
-    if (upb_issubmsg(f)) {
+    if (upb_fielddef_issubmsg(f)) {
      upb_status_seterrliteral(status, "Submessages cannot have defaults.");
      return;
    }
-    if (upb_isstring(f) || f->type == UPB_TYPE(ENUM)) {
+    if (upb_fielddef_isstring(f) || upb_fielddef_type(f) == UPB_TYPE(ENUM)) {
      upb_fielddef_setdefaultcstr(f, r->default_string);
    } else {
      upb_value val;
      upb_value_setptr(&val, NULL);  // Silence inaccurate compiler warnings.
-      if (!upb_fielddef_parsedefault(r->default_string, &val, f->type)) {
+      if (!parse_default(r->default_string, &val, upb_fielddef_type(f))) {
        // We don't worry too much about giving a great error message since the
        // compiler should have ensured this was correct.
        upb_status_seterrliteral(status, "Error converting default value.");
@ -430,132 +356,147 @@ static void upb_fielddef_endmsg(void *_r, upb_status *status) {
  }
 }

-static upb_flow_t upb_fielddef_ontype(void *_r, upb_value fval, upb_value val) {
-  (void)fval;
+static bool field_ontype(void *_r, void *fval, int32_t val) {
+  UPB_UNUSED(fval);
  upb_descreader *r = _r;
-  upb_fielddef_settype(r->f, upb_value_getint32(val));
-  return UPB_CONTINUE;
+  upb_fielddef_settype(r->f, val);
+  return true;
 }

-static upb_flow_t upb_fielddef_onlabel(void *_r, upb_value fval, upb_value val) {
-  (void)fval;
+static bool field_onlabel(void *_r, void *fval, int32_t val) {
+  UPB_UNUSED(fval);
  upb_descreader *r = _r;
-  upb_fielddef_setlabel(r->f, upb_value_getint32(val));
-  return UPB_CONTINUE;
+  upb_fielddef_setlabel(r->f, val);
+  return true;
 }

-static upb_flow_t upb_fielddef_onnumber(void *_r, upb_value fval, upb_value val) {
-  (void)fval;
+static bool field_onnumber(void *_r, void *fval, int32_t val) {
+  UPB_UNUSED(fval);
  upb_descreader *r = _r;
-  upb_fielddef_setnumber(r->f, upb_value_getint32(val));
-  return UPB_CONTINUE;
+  upb_fielddef_setnumber(r->f, val);
+  return true;
 }

-static upb_flow_t upb_fielddef_onname(void *_r, upb_value fval, upb_value val) {
-  (void)fval;
+static size_t field_onname(void *_r, void *fval, const char *buf, size_t n) {
+  UPB_UNUSED(fval);
  upb_descreader *r = _r;
-  char *name = upb_byteregion_strdup(upb_value_getbyteregion(val));
+  // XXX: see comment at the top of the file.
+  char *name = upb_strndup(buf, n);
  upb_fielddef_setname(r->f, name);
  free(name);
-  return UPB_CONTINUE;
+  return n;
 }

-static upb_flow_t upb_fielddef_ontypename(void *_r, upb_value fval,
-                                          upb_value val) {
-  (void)fval;
+static size_t field_ontypename(void *_r, void *fval, const char *buf,
+                               size_t n) {
+  UPB_UNUSED(fval);
  upb_descreader *r = _r;
-  char *name = upb_byteregion_strdup(upb_value_getbyteregion(val));
-  upb_fielddef_setsubtypename(r->f, name);
+  // XXX: see comment at the top of the file.
+  char *name = upb_strndup(buf, n);
+  upb_fielddef_setsubdefname(r->f, name);
  free(name);
-  return UPB_CONTINUE;
+  return n;
 }

-static upb_flow_t upb_fielddef_ondefaultval(void *_r, upb_value fval,
-                                            upb_value val) {
-  (void)fval;
+static size_t field_ondefaultval(void *_r, void *fval, const char *buf,
+                                 size_t n) {
+  UPB_UNUSED(fval);
  upb_descreader *r = _r;
  // Have to convert from string to the correct type, but we might not know the
-  // type yet.
+  // type yet, so we save it as a string until the end of the field.
+  // XXX: see comment at the top of the file.
  free(r->default_string);
-  r->default_string = upb_byteregion_strdup(upb_value_getbyteregion(val));
-  return UPB_CONTINUE;
-}
-
-static upb_mhandlers *upb_fielddef_register_FieldDescriptorProto(
-    upb_handlers *h) {
-  upb_mhandlers *m = upb_handlers_newmhandlers(h);
-  upb_mhandlers_setstartmsg(m, &upb_fielddef_startmsg);
-  upb_mhandlers_setendmsg(m, &upb_fielddef_endmsg);
-
-#define FIELD(name, handler) \
-  upb_fhandlers_setvalue( \
-      upb_mhandlers_newfhandlers(m, \
-          GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDNUM, \
-          GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDTYPE, \
-          false), \
-      handler);
-  FIELD(TYPE, &upb_fielddef_ontype);
-  FIELD(LABEL, &upb_fielddef_onlabel);
-  FIELD(NUMBER, &upb_fielddef_onnumber);
-  FIELD(NAME, &upb_fielddef_onname);
-  FIELD(TYPE_NAME, &upb_fielddef_ontypename);
-  FIELD(DEFAULT_VALUE, &upb_fielddef_ondefaultval);
-  return m;
-}
-#undef FNUM
-#undef FTYPE
-
-
-// google.protobuf.DescriptorProto.
-static upb_flow_t upb_msgdef_startmsg(void *_r) {
+  r->default_string = upb_strndup(buf, n);
+  return n;
+}
+
+// Handlers for google.protobuf.DescriptorProto (representing a message).
+static bool msg_startmsg(void *_r) {
  upb_descreader *r = _r;
-  upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new(&r->defs)));
+  upb_deflist_push(&r->defs, upb_upcast(upb_msgdef_new(&r->defs)));
  upb_descreader_startcontainer(r);
-  return UPB_CONTINUE;
+  return true;
 }

-static void upb_msgdef_endmsg(void *_r, upb_status *status) {
+static void msg_endmsg(void *_r, upb_status *status) {
  upb_descreader *r = _r;
  upb_msgdef *m = upb_descreader_top(r);
-  if(!upb_def_fullname(UPB_UPCAST(m))) {
+  if(!upb_def_fullname(upb_upcast(m))) {
    upb_status_seterrliteral(status, "Encountered message with no name.");
    return;
  }
  upb_descreader_endcontainer(r);
 }

-static upb_flow_t upb_msgdef_onname(void *_r, upb_value fval, upb_value val) {
-  (void)fval;
+static size_t msg_onname(void *_r, void *fval, const char *buf, size_t n) {
+  UPB_UNUSED(fval);
  upb_descreader *r = _r;
  upb_msgdef *m = upb_descreader_top(r);
-  char *name = upb_byteregion_strdup(upb_value_getbyteregion(val));
-  upb_def_setfullname(UPB_UPCAST(m), name);
+  // XXX: see comment at the top of the file.
+  char *name = upb_strndup(buf, n);
+  upb_def_setfullname(upb_upcast(m), name);
  upb_descreader_setscopename(r, name);  // Passes ownership of name.
-  return UPB_CONTINUE;
+  return n;
 }

-static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h) {
-  upb_mhandlers *m = upb_handlers_newmhandlers(h);
-  upb_mhandlers_setstartmsg(m, &upb_msgdef_startmsg);
-  upb_mhandlers_setendmsg(m, &upb_msgdef_endmsg);
-
-#define FNUM(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDNUM
-#define FTYPE(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDTYPE
-  upb_fhandlers *f =
-      upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
-  upb_fhandlers_setvalue(f, &upb_msgdef_onname);
-
-  upb_mhandlers_newfhandlers_subm(m, FNUM(FIELD), FTYPE(FIELD), true,
-                                  upb_fielddef_register_FieldDescriptorProto(h));
-  upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true,
-                                  upb_enumdef_register_EnumDescriptorProto(h));
+static bool msg_onendfield(void *_r, void *fval) {
+  UPB_UNUSED(fval);
+  upb_descreader *r = _r;
+  upb_msgdef *m = upb_descreader_top(r);
+  upb_msgdef_addfield(m, r->f, &r->defs);
+  r->f = NULL;
+  return true;
+}

-  // DescriptorProto is self-recursive, so we must link the definition.
-  upb_mhandlers_newfhandlers_subm(
-      m, FNUM(NESTED_TYPE), FTYPE(NESTED_TYPE), true, m);
+static bool discardfield(void *_r, void *fval) {
+  UPB_UNUSED(fval);
+  upb_descreader *r = _r;
+  // Discard extension field so we don't leak it.
+  upb_fielddef_unref(r->f, &r->defs);
+  r->f = NULL;
+  return true;
+}
+
+static void reghandlers(void *closure, upb_handlers *h) {
+  UPB_UNUSED(closure);
+  const upb_msgdef *m = upb_handlers_msgdef(h);
+
+  if (m == GOOGLE_PROTOBUF_DESCRIPTORPROTO) {
+    upb_handlers_setstartmsg(h, &msg_startmsg);
+    upb_handlers_setendmsg(h, &msg_endmsg);
+    upb_handlers_setstring_n(h, "name",  &msg_onname, NULL, NULL);
+    upb_handlers_setendsubmsg_n(h,   "field", &msg_onendfield, NULL, NULL);
+    // TODO: support extensions
+    upb_handlers_setendsubmsg_n(h, "extension", &discardfield, NULL, NULL);
+  } else if (m == GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO) {
+    upb_handlers_setstartmsg(h, &file_startmsg);
+    upb_handlers_setendmsg(h, &file_endmsg);
+    upb_handlers_setstring_n(h, "package", &file_onpackage, NULL, NULL);
+    // TODO: support extensions
+    upb_handlers_setendsubmsg_n(h, "extension", &discardfield, NULL, NULL);
+  } else if (m == GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO) {
+    upb_handlers_setstartmsg(h, &enumval_startmsg);
+    upb_handlers_setendmsg(h, &enumval_endmsg);
+    upb_handlers_setstring_n(h, "name",   &enumval_onname, NULL, NULL);
+    upb_handlers_setint32_n(h,  "number", &enumval_onnumber, NULL, NULL);
+  } else if (m == GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO) {
+    upb_handlers_setstartmsg(h, &enum_startmsg);
+    upb_handlers_setendmsg(h, &enum_endmsg);
+    upb_handlers_setstring_n(h, "name", &enum_onname, NULL, NULL);
+  } else if (m == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO) {
+    upb_handlers_setstartmsg(h, &field_startmsg);
+    upb_handlers_setendmsg(h, &field_endmsg);
+    upb_handlers_setint32_n (h, "type",      &field_ontype, NULL, NULL);
+    upb_handlers_setint32_n (h, "label",     &field_onlabel, NULL, NULL);
+    upb_handlers_setint32_n (h, "number",    &field_onnumber, NULL, NULL);
+    upb_handlers_setstring_n(h, "name",      &field_onname, NULL, NULL);
+    upb_handlers_setstring_n(h, "type_name", &field_ontypename, NULL, NULL);
+    upb_handlers_setstring_n(
+        h, "default_value", &field_ondefaultval, NULL, NULL);
+  }
+}

-  // TODO: extensions.
-  return m;
+const upb_handlers *upb_descreader_newhandlers(const void *owner) {
+  return upb_handlers_newfrozen(
+      GOOGLE_PROTOBUF_FILEDESCRIPTORSET, owner, reghandlers, NULL);
 }
-#undef FNUM
-#undef FTYPE
--- a/upb/descriptor/reader.h
+++ b/upb/descriptor/reader.h
@ -67,7 +67,7 @@ void upb_descreader_uninit(upb_descreader *r);

 // Registers handlers that will build the defs.  Pass the descreader as the
 // closure.
-upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h);
+const upb_handlers *upb_descreader_newhandlers(const void *owner);

 // Gets the array of defs that have been parsed and removes them from the
 // descreader.  Ownership of the defs is passed to the caller using the given
--- a/upb/google/README
+++ b/upb/google/README
@ -0,0 +1,16 @@
+This directory contains code to interoperate with Google's official
+Protocol Buffers release.  Since it doesn't really have a name
+besides "protobuf," calling this directory "google" seems like the
+least confusing option.
+
+We support writing into protobuf's generated classes (and hopefully
+reading too, before long).  We support both the open source protobuf
+release and the Google-internal version of the same code.  The two
+live in different namespaces, and the internal version supports some
+features that are not supported in the open-source release.  Also, the
+internal version includes the legacy "proto1" classes which we must
+support; thankfully this is mostly relegated to its own separate file.
+
+Our functionality requires the full google::protobuf::Message
+interface; we rely on reflection so we know what fields to read/write
+and where to put them, so we can't support MessageLite.
--- a/upb/google/bridge.cc
+++ b/upb/google/bridge.cc
@ -0,0 +1,260 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// IMPORTANT NOTE!  This file is compiled TWICE, once with UPB_GOOGLE3 defined
+// and once without!  This allows us to provide functionality against proto2
+// and protobuf opensource both in a single binary without the two conflicting.
+// However we must be careful not to violate the ODR.
+
+#include "upb/google/bridge.h"
+
+#include <map>
+#include <string>
+#include "upb/def.h"
+#include "upb/google/proto1.h"
+#include "upb/google/proto2.h"
+#include "upb/handlers.h"
+
+namespace upb {
+namespace proto2_bridge_google3 { class Defs; }
+namespace proto2_bridge_opensource { class Defs; }
+}  // namespace upb
+
+#ifdef UPB_GOOGLE3
+#include "net/proto2/public/descriptor.h"
+#include "net/proto2/public/message.h"
+#include "net/proto2/proto/descriptor.pb.h"
+namespace goog = ::proto2;
+namespace me = ::upb::proto2_bridge_google3;
+#else
+#include "google/protobuf/descriptor.h"
+#include "google/protobuf/message.h"
+#include "google/protobuf/descriptor.pb.h"
+namespace goog = ::google::protobuf;
+namespace me = ::upb::proto2_bridge_opensource;
+#endif
+
+class me::Defs {
+ public:
+  void OnMessage(Handlers* h) {
+    const upb::MessageDef* md = h->message_def();
+    const goog::Message& m = *message_map_[md];
+    const goog::Descriptor* d = m.GetDescriptor();
+    for (upb::MessageDef::ConstIterator i(md); !i.Done(); i.Next()) {
+      const upb::FieldDef* upb_f = i.field();
+      const goog::FieldDescriptor* proto2_f =
+          d->FindFieldByNumber(upb_f->number());
+      if (!upb::google::TrySetWriteHandlers(proto2_f, m, upb_f, h)
+#ifdef UPB_GOOGLE3
+          && !upb::google::TrySetProto1WriteHandlers(proto2_f, m, upb_f, h)
+#endif
+         ) {
+        // Unsupported reflection class.
+        //
+        // Should we fall back to using the public Reflection interface in this
+        // case?  It's unclear whether it's supported behavior for users to
+        // create their own Reflection classes.
+        assert(false);
+      }
+    }
+  }
+
+  static void StaticOnMessage(void *closure, upb::Handlers* handlers) {
+    me::Defs* defs = static_cast<me::Defs*>(closure);
+    defs->OnMessage(handlers);
+  }
+
+  void AddSymbol(const std::string& name, upb::Def* def) {
+    assert(symbol_map_.find(name) == symbol_map_.end());
+    symbol_map_[name] = def;
+  }
+
+  void AddMessage(const goog::Message* m, upb::MessageDef* md) {
+    assert(message_map_.find(md) == message_map_.end());
+    message_map_[md] = m;
+    AddSymbol(m->GetDescriptor()->full_name(), md->Upcast());
+  }
+
+  upb::Def* FindSymbol(const std::string& name) {
+    SymbolMap::iterator iter = symbol_map_.find(name);
+    return iter != symbol_map_.end() ? iter->second : NULL;
+  }
+
+  void Flatten(std::vector<upb::Def*>* defs) {
+    SymbolMap::iterator iter;
+    for (iter = symbol_map_.begin(); iter != symbol_map_.end(); ++iter) {
+      defs->push_back(iter->second);
+    }
+  }
+
+ private:
+  // Maps a new upb::MessageDef* to a corresponding proto2 Message* whose
+  // derived class is of the correct type according to the message the user
+  // gave us.
+  typedef std::map<const upb::MessageDef*, const goog::Message*> MessageMap;
+  MessageMap message_map_;
+
+  // Maps a type name to a upb Def we have constructed to represent it.
+  typedef std::map<std::string, upb::Def*> SymbolMap;
+  SymbolMap symbol_map_;
+};
+
+namespace upb {
+namespace google {
+
+// For submessage fields, stores a pointer to an instance of the submessage in
+// *subm (but it is *not* guaranteed to be a prototype).
+FieldDef* AddFieldDef(const goog::Message& m, const goog::FieldDescriptor* f,
+                      upb::MessageDef* md, const goog::Message** subm) {
+  // To parse weak submessages effectively, we need to represent them in the
+  // upb::Def schema even though they are not reflected in the proto2
+  // descriptors (weak fields are represented as FieldDescriptor::TYPE_BYTES).
+  const goog::Message* weak_prototype = NULL;
+#ifdef UPB_GOOGLE3
+  weak_prototype = upb::google::GetProto1WeakPrototype(m, f);
+#endif
+
+  upb::FieldDef* upb_f = upb::FieldDef::New(&upb_f);
+  upb_f->set_number(f->number());
+  upb_f->set_name(f->name());
+  upb_f->set_label(static_cast<upb::FieldDef::Label>(f->label()));
+  upb_f->set_type(weak_prototype ?
+      UPB_TYPE_MESSAGE : static_cast<upb::FieldDef::Type>(f->type()));
+
+  if (weak_prototype) {
+    upb_f->set_subdef_name(weak_prototype->GetDescriptor()->full_name());
+  } else if (upb_f->IsSubMessage()) {
+    upb_f->set_subdef_name(f->message_type()->full_name());
+  } else if (upb_f->type() == UPB_TYPE(ENUM)) {
+    // We set the enum default numerically.
+    upb_f->set_default_value(
+        MakeValue(static_cast<int32_t>(f->default_value_enum()->number())));
+    upb_f->set_subdef_name(f->enum_type()->full_name());
+  } else {
+    // Set field default for primitive types.  Need to switch on the upb type
+    // rather than the proto2 type, because upb_f->type() may have been changed
+    // from BYTES to MESSAGE for a weak field.
+    switch (upb_types[upb_f->type()].inmemory_type) {
+      case UPB_CTYPE_INT32:
+        upb_f->set_default_value(MakeValue(f->default_value_int32()));
+        break;
+      case UPB_CTYPE_INT64:
+        upb_f->set_default_value(
+            MakeValue(static_cast<int64_t>(f->default_value_int64())));
+        break;
+      case UPB_CTYPE_UINT32:
+        upb_f->set_default_value(MakeValue(f->default_value_uint32()));
+        break;
+      case UPB_CTYPE_UINT64:
+        upb_f->set_default_value(
+            MakeValue(static_cast<uint64_t>(f->default_value_uint64())));
+        break;
+      case UPB_CTYPE_DOUBLE:
+        upb_f->set_default_value(MakeValue(f->default_value_double()));
+        break;
+      case UPB_CTYPE_FLOAT:
+        upb_f->set_default_value(MakeValue(f->default_value_float()));
+        break;
+      case UPB_CTYPE_BOOL:
+        upb_f->set_default_value(MakeValue(f->default_value_bool()));
+        break;
+      case UPB_CTYPE_BYTEREGION:
+        upb_f->set_default_string(f->default_value_string());
+        break;
+    }
+  }
+  bool ok = md->AddField(upb_f, &upb_f);
+  UPB_ASSERT_VAR(ok, ok);
+
+  if (weak_prototype) {
+    *subm = weak_prototype;
+  } else if (f->cpp_type() == goog::FieldDescriptor::CPPTYPE_MESSAGE) {
+    *subm = upb::google::GetFieldPrototype(m, f);
+#ifdef UPB_GOOGLE3
+    if (!*subm)
+      *subm = upb::google::GetProto1FieldPrototype(m, f);
+#endif
+    assert(*subm);
+  }
+
+  return upb_f;
+}
+
+upb::EnumDef* NewEnumDef(const goog::EnumDescriptor* desc, void *owner) {
+  upb::EnumDef* e = upb::EnumDef::New(owner);
+  e->set_full_name(desc->full_name());
+  for (int i = 0; i < desc->value_count(); i++) {
+    const goog::EnumValueDescriptor* val = desc->value(i);
+    bool success = e->AddValue(val->name(), val->number(), NULL);
+    UPB_ASSERT_VAR(success, success);
+  }
+  return e;
+}
+
+static upb::MessageDef* NewMessageDef(const goog::Message& m, void *owner,
+                                      me::Defs* defs) {
+  upb::MessageDef* md = upb::MessageDef::New(owner);
+  md->set_full_name(m.GetDescriptor()->full_name());
+
+  // Must do this before processing submessages to prevent infinite recursion.
+  defs->AddMessage(&m, md);
+
+  const goog::Descriptor* d = m.GetDescriptor();
+  for (int i = 0; i < d->field_count(); i++) {
+    const goog::FieldDescriptor* proto2_f = d->field(i);
+
+#ifdef UPB_GOOGLE3
+    // Skip lazy fields for now since we can't properly handle them.
+    if (proto2_f->options().lazy()) continue;
+#endif
+    // Extensions not supported yet.
+    if (proto2_f->is_extension()) continue;
+
+    const goog::Message* subm_prototype;
+    upb::FieldDef* f = AddFieldDef(m, proto2_f, md, &subm_prototype);
+
+    if (!f->HasSubDef()) continue;
+
+    upb::Def* subdef = defs->FindSymbol(f->subdef_name());
+    if (!subdef) {
+      if (f->type() == UPB_TYPE(ENUM)) {
+        subdef = NewEnumDef(proto2_f->enum_type(), owner)->Upcast();
+        defs->AddSymbol(subdef->full_name(), subdef);
+      } else {
+        assert(f->IsSubMessage());
+        assert(subm_prototype);
+        subdef = NewMessageDef(*subm_prototype, owner, defs)->Upcast();
+      }
+    }
+    f->set_subdef(subdef);
+  }
+
+  return md;
+}
+
+const upb::Handlers* NewWriteHandlers(const goog::Message& m, void *owner) {
+  me::Defs defs;
+  const upb::MessageDef* md = NewMessageDef(m, owner, &defs);
+
+  std::vector<upb::Def*> defs_vec;
+  defs.Flatten(&defs_vec);
+  Status status;
+  bool success = Def::Freeze(defs_vec, &status);
+  UPB_ASSERT_VAR(success, success);
+
+  const upb::Handlers* ret =
+      upb::Handlers::NewFrozen(md, owner, me::Defs::StaticOnMessage, &defs);
+
+  // Unref all defs, since they're now ref'd by the handlers.
+  for (int i = 0; i < static_cast<int>(defs_vec.size()); i++) {
+    defs_vec[i]->Unref(owner);
+  }
+
+  return ret;
+}
+
+}  // namespace google
+}  // namespace upb
--- a/upb/google/bridge.h
+++ b/upb/google/bridge.h
@ -0,0 +1,76 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// This file contains functionality for constructing upb Defs and Handlers
+// corresponding to proto2 messages.  Using this functionality, you can use upb
+// to dynamically generate parsing code that can behave exactly like proto2's
+// generated parsing code.  Alternatively, you can configure things to
+// read/write only a subset of the fields for higher performance when only some
+// fields are needed.
+//
+// Example usage (FIX XXX):
+//
+//   // Build a def that will have all fields and parse just like proto2 would.
+//   const upb::MessageDef* md = upb::proto2_bridge::NewMessageDef(&MyProto());
+//
+//   // JIT the parser; should only be done once ahead-of-time.
+//   upb::Handlers* handlers = upb::NewHandlersForMessage(md);
+//   upb::DecoderPlan* plan = upb::DecoderPlan::New(handlers);
+//   handlers->Unref();
+//
+//   // The actual parsing.
+//   MyProto proto;
+//   upb::Decoder decoder;
+//   upb::StringSource source(buf, len);
+//   decoder.ResetPlan(plan, 0);
+//   decoder.ResetInput(source.AllBytes(), &proto);
+//   CHECK(decoder.Decode() == UPB_OK) << decoder.status();
+//
+// To parse only one field and skip all others:
+//
+//   const upb::MessageDef* md =
+//       upb::proto2_bridge::NewEmptyMessageDef(MyProto().GetPrototype());
+//   upb::proto2_bridge::AddFieldDef(
+//       MyProto::descriptor()->FindFieldByName("my_field"), md);
+//   upb::Freeze(md);
+//
+//   // Now continue with "JIT the parser" from above.
+//
+// Note that there is currently no support for
+// CodedInputStream::SetExtensionRegistry(), which allows specifying a separate
+// DescriptorPool and MessageFactory for extensions.  Since this is a property
+// of the input in proto2, it's difficult to build a plan ahead-of-time that
+// can properly support this.  If it's an important use case, the caller should
+// probably build a upb plan explicitly.
+
+#ifndef UPB_GOOGLE_BRIDGE_H_
+#define UPB_GOOGLE_BRIDGE_H_
+
+namespace google {
+namespace protobuf { class Message; }
+}  // namespace google
+
+namespace proto2 { class Message; }
+
+namespace upb {
+
+class Handlers;
+
+namespace google {
+
+// Returns a upb::Handlers object that can be used to populate a proto2::Message
+// object of the same type as "m."
+//
+// TODO(haberman): Add handler caching functionality so that we don't use
+// O(n^2) memory in the worst case when incrementally building handlers.
+const upb::Handlers* NewWriteHandlers(const proto2::Message& m, void *owner);
+const upb::Handlers* NewWriteHandlers(const ::google::protobuf::Message& m,
+                                      void *owner);
+
+}  // namespace google
+}  // namespace upb
+
+#endif  // UPB_GOOGLE_BRIDGE_H_
--- a/upb/google/cord.h
+++ b/upb/google/cord.h
@ -0,0 +1,48 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// Functionality for interoperating with Cord.  Only needed inside Google.
+
+#ifndef UPB_GOOGLE_CORD_H
+#define UPB_GOOGLE_CORD_H
+
+#include "strings/cord.h"
+#include "upb/bytestream.h"
+
+namespace upb {
+
+namespace proto2_bridge_google3 { class FieldAccessor; }
+namespace proto2_bridge_opensource { class FieldAccessor; }
+
+namespace google {
+
+class P2R_Handlers;
+
+class CordSupport {
+ private:
+  UPB_DISALLOW_POD_OPS(CordSupport);
+
+  inline static void AssignToCord(const upb::ByteRegion* r, Cord* cord) {
+    // TODO(haberman): ref source data if source is a cord.
+    cord->Clear();
+    uint64_t ofs = r->start_ofs();
+    while (ofs < r->end_ofs()) {
+      size_t len;
+      const char *buf = r->GetPtr(ofs, &len);
+      cord->Append(StringPiece(buf, len));
+      ofs += len;
+    }
+  }
+
+  friend class ::upb::proto2_bridge_google3::FieldAccessor;
+  friend class ::upb::proto2_bridge_opensource::FieldAccessor;
+  friend class P2R_Handlers;
+};
+
+}  // namespace google
+}  // namespace upb
+
+#endif  // UPB_GOOGLE_CORD_H
--- a/upb/google/proto1.cc
+++ b/upb/google/proto1.cc
@ -0,0 +1,502 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// This set of handlers can write into a proto2::Message whose reflection class
+// is _pi::Proto2Reflection (ie. proto1 messages; while slightly confusing, the
+// name "Proto2Reflection" indicates that it is a reflection class implementing
+// the proto2 reflection interface, but is used for proto1 generated messages).
+//
+// Like FieldAccessor this depends on breaking encapsulation, and will need to
+// be changed if and when the details of _pi::Proto2Reflection change.
+//
+// Note that we have received an exception from c-style-artiters regarding
+// dynamic_cast<> in this file:
+// https://groups.google.com/a/google.com/d/msg/c-style/7Zp_XCX0e7s/I6dpzno4l-MJ
+
+#include "upb/google/proto1.h"
+
+// TODO(haberman): friend upb so that this isn't required.
+#define protected public
+#include "net/proto2/public/repeated_field.h"
+#undef private
+
+// TODO(haberman): friend upb so that this isn't required.
+#define private public
+#include "net/proto/proto2_reflection.h"
+#undef private
+
+#include "net/proto/internal_layout.h"
+#include "upb/bytestream.h"
+#include "upb/def.h"
+#include "upb/google/cord.h"
+#include "upb/handlers.h"
+
+template<class T> static T* GetPointer(void *message, size_t offset) {
+  return reinterpret_cast<T*>(static_cast<char*>(message) + offset);
+}
+
+namespace upb {
+namespace google {
+
+class P2R_Handlers {
+ public:
+  // Returns true if we were able to set an accessor and any other properties
+  // of the FieldDef that are necessary to read/write this field to a
+  // proto2::Message.
+  static bool TrySet(const proto2::FieldDescriptor* proto2_f,
+                     const proto2::Message& m,
+                     const upb::FieldDef* upb_f, upb::Handlers* h) {
+    const proto2::Reflection* base_r = m.GetReflection();
+    // See file comment re: dynamic_cast.
+    const _pi::Proto2Reflection* r =
+        dynamic_cast<const _pi::Proto2Reflection*>(base_r);
+    if (!r) return false;
+    // Extensions not supported yet.
+    if (proto2_f->is_extension()) return false;
+
+    switch (r->GetFieldLayout(proto2_f)->crep) {
+#define PRIMITIVE(name, type_name) \
+      case _pi::CREP_REQUIRED_ ## name: \
+      case _pi::CREP_OPTIONAL_ ## name: \
+      case _pi::CREP_REPEATED_ ## name: \
+        SetPrimitiveHandlers<type_name>(proto2_f, r, upb_f, h); return true;
+      PRIMITIVE(DOUBLE,   double);
+      PRIMITIVE(FLOAT,    float);
+      PRIMITIVE(INT64,    int64_t);
+      PRIMITIVE(UINT64,   uint64_t);
+      PRIMITIVE(INT32,    int32_t);
+      PRIMITIVE(FIXED64,  uint64_t);
+      PRIMITIVE(FIXED32,  uint32_t);
+      PRIMITIVE(BOOL,     bool);
+#undef PRIMITIVE
+      case _pi::CREP_REQUIRED_STRING:
+      case _pi::CREP_OPTIONAL_STRING:
+      case _pi::CREP_REPEATED_STRING:
+        SetStringHandlers(proto2_f, r, upb_f, h);
+        return true;
+      case _pi::CREP_OPTIONAL_OUTOFLINE_STRING:
+        SetOutOfLineStringHandlers(proto2_f, r, upb_f, h);
+        return true;
+      case _pi::CREP_REQUIRED_CORD:
+      case _pi::CREP_OPTIONAL_CORD:
+      case _pi::CREP_REPEATED_CORD:
+        SetCordHandlers(proto2_f, r, upb_f, h);
+        return true;
+      case _pi::CREP_REQUIRED_GROUP:
+      case _pi::CREP_REQUIRED_FOREIGN:
+      case _pi::CREP_REQUIRED_FOREIGN_PROTO2:
+        SetRequiredMessageHandlers(proto2_f, m, r, upb_f, h);
+        return true;
+      case _pi::CREP_OPTIONAL_GROUP:
+      case _pi::CREP_REPEATED_GROUP:
+      case _pi::CREP_OPTIONAL_FOREIGN:
+      case _pi::CREP_REPEATED_FOREIGN:
+      case _pi::CREP_OPTIONAL_FOREIGN_PROTO2:
+      case _pi::CREP_REPEATED_FOREIGN_PROTO2:
+        SetMessageHandlers(proto2_f, m, r, upb_f, h);
+        return true;
+      case _pi::CREP_OPTIONAL_FOREIGN_WEAK:
+      case _pi::CREP_OPTIONAL_FOREIGN_WEAK_PROTO2:
+        SetWeakMessageHandlers(proto2_f, m, r, upb_f, h);
+        return true;
+      default: assert(false); return false;
+    }
+  }
+
+  // If the field "f" in the message "m" is a weak field, returns the prototype
+  // of the submessage (which may be a specific type or may be OpaqueMessage).
+  // Otherwise returns NULL.
+  static const proto2::Message* GetWeakPrototype(
+      const proto2::Message& m,
+      const proto2::FieldDescriptor* f) {
+    // See file comment re: dynamic_cast.
+    const _pi::Proto2Reflection* r =
+        dynamic_cast<const _pi::Proto2Reflection*>(m.GetReflection());
+    if (!r) return NULL;
+
+    const _pi::Field* field = r->GetFieldLayout(f);
+    if (field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK) {
+      return static_cast<const proto2::Message*>(
+          field->weak_layout()->default_instance);
+    } else if (field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK_PROTO2) {
+      return field->proto2_weak_default_instance();
+    } else {
+      return NULL;
+    }
+  }
+
+  // If "m" is a message that uses Proto2Reflection, returns the prototype of
+  // the submessage (which may be OpaqueMessage for a weak field that is not
+  // linked in).  Otherwise returns NULL.
+  static const proto2::Message* GetFieldPrototype(
+      const proto2::Message& m,
+      const proto2::FieldDescriptor* f) {
+    // See file comment re: dynamic_cast.
+    const proto2::Message* ret = GetWeakPrototype(m, f);
+    if (ret) {
+      return ret;
+    } else if (dynamic_cast<const _pi::Proto2Reflection*>(m.GetReflection())) {
+      // Since proto1 has no dynamic message, it must be from the generated
+      // factory.
+      assert(f->cpp_type() == proto2::FieldDescriptor::CPPTYPE_MESSAGE);
+      ret = proto2::MessageFactory::generated_factory()->GetPrototype(
+          f->message_type());
+      assert(ret);
+      return ret;
+    } else {
+      return NULL;
+    }
+  }
+
+ private:
+  class FieldOffset {
+   public:
+    FieldOffset(
+        const proto2::FieldDescriptor* f,
+        const _pi::Proto2Reflection* r)
+        : offset_(GetOffset(f, r)),
+          is_repeated_(f->is_repeated()) {
+      if (!is_repeated_) {
+        int64_t hasbit = GetHasbit(f, r);
+        hasbyte_ = hasbit / 8;
+        mask_ = 1 << (hasbit % 8);
+      }
+    }
+
+    template<class T> T* GetFieldPointer(void* message) const {
+      return GetPointer<T>(message, offset_);
+    }
+
+    void SetHasbit(void* message) const {
+      assert(!is_repeated_);
+      uint8_t* byte = GetPointer<uint8_t>(message, hasbyte_);
+      *byte |= mask_;
+    }
+
+   private:
+    const size_t offset_;
+    bool is_repeated_;
+
+    // Only for non-repeated fields.
+    int32_t hasbyte_;
+    int8_t mask_;
+  };
+
+  static upb_selector_t GetSelector(const upb::FieldDef* f,
+                                    upb::Handlers::Type type) {
+    upb::Handlers::Selector selector;
+    bool ok = upb::Handlers::GetSelector(f, type, &selector);
+    UPB_ASSERT_VAR(ok, ok);
+    return selector;
+  }
+
+
+  static int16_t GetHasbit(const proto2::FieldDescriptor* f,
+                           const _pi::Proto2Reflection* r) {
+    assert(!f->is_repeated());
+    return (r->layout_->has_bit_offset * 8) + r->GetFieldLayout(f)->has_index;
+  }
+
+  static uint16_t GetOffset(const proto2::FieldDescriptor* f,
+                            const _pi::Proto2Reflection* r) {
+    return r->GetFieldLayout(f)->offset;
+  }
+
+  // StartSequence /////////////////////////////////////////////////////////////
+
+  static void SetStartSequenceHandler(
+      const proto2::FieldDescriptor* proto2_f, const _pi::Proto2Reflection* r,
+      const upb::FieldDef* f, upb::Handlers* h) {
+    assert(f->IsSequence());
+    h->SetStartSequenceHandler(
+        f, &PushOffset, new FieldOffset(proto2_f, r),
+        &upb::DeletePointer<FieldOffset>);
+  }
+
+  static void* PushOffset(void *m, void *fval) {
+    const FieldOffset* offset = static_cast<FieldOffset*>(fval);
+    return offset->GetFieldPointer<void>(m);
+  }
+
+  // Primitive Value (numeric, enum, bool) /////////////////////////////////////
+
+  template <typename T> static void SetPrimitiveHandlers(
+      const proto2::FieldDescriptor* proto2_f,
+      const _pi::Proto2Reflection* r,
+      const upb::FieldDef* f, upb::Handlers* h) {
+    if (f->IsSequence()) {
+      SetStartSequenceHandler(proto2_f, r, f, h);
+      h->SetValueHandler<T>(f, &Append<T>, NULL, NULL);
+    } else {
+      upb::SetStoreValueHandler<T>(
+          f, GetOffset(proto2_f, r), GetHasbit(proto2_f, r), h);
+    }
+  }
+
+  template <typename T>
+  static bool Append(void *_r, void *fval, T val) {
+    UPB_UNUSED(fval);
+    // Proto1's ProtoArray class derives from proto2::RepeatedField.
+    proto2::RepeatedField<T>* r = static_cast<proto2::RepeatedField<T>*>(_r);
+    r->Add(val);
+    return true;
+  }
+
+  // String ////////////////////////////////////////////////////////////////////
+
+  static void SetStringHandlers(
+      const proto2::FieldDescriptor* proto2_f,
+      const _pi::Proto2Reflection* r,
+      const upb::FieldDef* f, upb::Handlers* h) {
+    h->SetStringHandler(f, &OnStringBuf, NULL, NULL);
+    if (f->IsSequence()) {
+      SetStartSequenceHandler(proto2_f, r, f, h);
+      h->SetStartStringHandler(f, &StartRepeatedString, NULL, NULL);
+    } else {
+      h->SetStartStringHandler(
+          f, &StartString, new FieldOffset(proto2_f, r),
+          &upb::DeletePointer<FieldOffset>);
+    }
+  }
+
+  static void* StartString(void *m, void *fval, size_t size_hint) {
+    UPB_UNUSED(size_hint);
+    const FieldOffset* info = static_cast<const FieldOffset*>(fval);
+    info->SetHasbit(m);
+    string* str = info->GetFieldPointer<string>(m);
+    str->clear();
+    // reserve() here appears to hurt performance rather than help.
+    return str;
+  }
+
+  static size_t OnStringBuf(void *_s, void *fval, const char *buf, size_t n) {
+    string* s = static_cast<string*>(_s);
+    s->append(buf, n);
+    return n;
+  }
+
+  static void* StartRepeatedString(void *_r, void *fval, size_t size_hint) {
+    UPB_UNUSED(fval);
+    proto2::RepeatedPtrField<string>* r =
+        static_cast<proto2::RepeatedPtrField<string>*>(_r);
+    string* str = r->Add();
+    // reserve() here appears to hurt performance rather than help.
+    return str;
+  }
+
+  // Out-of-line string ////////////////////////////////////////////////////////
+
+  static void SetOutOfLineStringHandlers(
+      const proto2::FieldDescriptor* proto2_f,
+      const _pi::Proto2Reflection* r,
+      const upb::FieldDef* f, upb::Handlers* h) {
+    // This type is only used for non-repeated string fields.
+    assert(!f->IsSequence());
+    h->SetStartStringHandler(
+        f, &StartOutOfLineString, new FieldOffset(proto2_f, r),
+        &upb::DeletePointer<FieldOffset>);
+    h->SetStringHandler(f, &OnStringBuf, NULL, NULL);
+  }
+
+  static void* StartOutOfLineString(void *m, void *fval, size_t size_hint) {
+    const FieldOffset* info = static_cast<const FieldOffset*>(fval);
+    info->SetHasbit(m);
+    string **str = info->GetFieldPointer<string*>(m);
+    if (*str == &::ProtocolMessage::___empty_internal_proto_string_)
+      *str = new string();
+    (*str)->clear();
+    // reserve() here appears to hurt performance rather than help.
+    return *str;
+  }
+
+  // Cord //////////////////////////////////////////////////////////////////////
+
+  static void SetCordHandlers(
+      const proto2::FieldDescriptor* proto2_f,
+      const _pi::Proto2Reflection* r,
+      const upb::FieldDef* f, upb::Handlers* h) {
+    h->SetStringHandler(f, &OnCordBuf, NULL, NULL);
+    if (f->IsSequence()) {
+      SetStartSequenceHandler(proto2_f, r, f, h);
+      h->SetStartStringHandler(f, &StartRepeatedCord, NULL, NULL);
+    } else {
+      h->SetStartStringHandler(
+          f, &StartCord, new FieldOffset(proto2_f, r),
+          &upb::DeletePointer<FieldOffset*>);
+    }
+  }
+
+  static void* StartCord(void *m, void *fval, size_t size_hint) {
+    UPB_UNUSED(size_hint);
+    UPB_UNUSED(fval);
+    const FieldOffset* offset = static_cast<const FieldOffset*>(fval);
+    offset->SetHasbit(m);
+    Cord* field = offset->GetFieldPointer<Cord>(m);
+    field->Clear();
+    return field;
+  }
+
+  static size_t OnCordBuf(void *_c, void *fval, const char *buf, size_t n) {
+    UPB_UNUSED(fval);
+    Cord* c = static_cast<Cord*>(_c);
+    c->Append(StringPiece(buf, n));
+    return true;
+  }
+
+  static void* StartRepeatedCord(void *_r, void *fval, size_t size_hint) {
+    UPB_UNUSED(size_hint);
+    UPB_UNUSED(fval);
+    proto2::RepeatedField<Cord>* r =
+        static_cast<proto2::RepeatedField<Cord>*>(_r);
+    return r->Add();
+  }
+
+  // SubMessage ////////////////////////////////////////////////////////////////
+
+  class SubMessageHandlerData : public FieldOffset {
+   public:
+    SubMessageHandlerData(
+        const proto2::Message& prototype,
+        const proto2::FieldDescriptor* f,
+        const _pi::Proto2Reflection* r)
+        : FieldOffset(f, r) {
+      prototype_ = GetWeakPrototype(prototype, f);
+      if (!prototype_)
+        prototype_ = GetFieldPrototype(prototype, f);
+    }
+
+    const proto2::Message* prototype() const { return prototype_; }
+
+   private:
+    const proto2::Message* prototype_;
+  };
+
+  static void SetStartSubMessageHandler(
+      const proto2::FieldDescriptor* proto2_f,
+      const proto2::Message& m,
+      const _pi::Proto2Reflection* r,
+      upb::Handlers::StartFieldHandler* handler,
+      const upb::FieldDef* f, upb::Handlers* h) {
+    h->SetStartSubMessageHandler(
+        f, handler,
+        new SubMessageHandlerData(m, proto2_f, r),
+        &upb::DeletePointer<SubMessageHandlerData>);
+  }
+
+  static void SetRequiredMessageHandlers(
+      const proto2::FieldDescriptor* proto2_f,
+      const proto2::Message& m,
+      const _pi::Proto2Reflection* r,
+      const upb::FieldDef* f, upb::Handlers* h) {
+    if (f->IsSequence()) {
+      SetStartSequenceHandler(proto2_f, r, f, h);
+      SetStartSubMessageHandler(proto2_f, m, r, &StartRepeatedSubMessage, f, h);
+    } else {
+      h->SetStartSubMessageHandler(
+          f, &StartRequiredSubMessage, new FieldOffset(proto2_f, r),
+          &upb::DeletePointer<FieldOffset>);
+    }
+  }
+
+  static void* StartRequiredSubMessage(void *m, void *fval) {
+    const FieldOffset* offset = static_cast<FieldOffset*>(fval);
+    offset->SetHasbit(m);
+    return offset->GetFieldPointer<void>(m);
+  }
+
+  static void SetMessageHandlers(
+      const proto2::FieldDescriptor* proto2_f,
+      const proto2::Message& m,
+      const _pi::Proto2Reflection* r,
+      const upb::FieldDef* f, upb::Handlers* h) {
+    if (f->IsSequence()) {
+      SetStartSequenceHandler(proto2_f, r, f, h);
+      SetStartSubMessageHandler(proto2_f, m, r, &StartRepeatedSubMessage, f, h);
+    } else {
+      SetStartSubMessageHandler(proto2_f, m, r, &StartSubMessage, f, h);
+    }
+  }
+
+  static void SetWeakMessageHandlers(
+      const proto2::FieldDescriptor* proto2_f,
+      const proto2::Message& m,
+      const _pi::Proto2Reflection* r,
+      const upb::FieldDef* f, upb::Handlers* h) {
+    if (f->IsSequence()) {
+      SetStartSequenceHandler(proto2_f, r, f, h);
+      SetStartSubMessageHandler(proto2_f, m, r, &StartRepeatedSubMessage, f, h);
+    } else {
+      SetStartSubMessageHandler(proto2_f, m, r, &StartWeakSubMessage, f, h);
+    }
+  }
+
+  static void* StartSubMessage(void *m, void *fval) {
+    const SubMessageHandlerData* info =
+        static_cast<const SubMessageHandlerData*>(fval);
+    info->SetHasbit(m);
+    proto2::Message **subm = info->GetFieldPointer<proto2::Message*>(m);
+    if (*subm == info->prototype()) *subm = (*subm)->New();
+    return *subm;
+  }
+
+  static void* StartWeakSubMessage(void *m, void *fval) {
+    const SubMessageHandlerData* info =
+        static_cast<const SubMessageHandlerData*>(fval);
+    info->SetHasbit(m);
+    proto2::Message **subm = info->GetFieldPointer<proto2::Message*>(m);
+    if (*subm == NULL) {
+      *subm = info->prototype()->New();
+    }
+    return *subm;
+  }
+
+  class RepeatedMessageTypeHandler {
+   public:
+    typedef void Type;
+    // AddAllocated() calls this, but only if other objects are sitting
+    // around waiting for reuse, which we will not do.
+    static void Delete(Type* t) {
+      (void)t;
+      assert(false);
+    }
+  };
+
+  // Closure is a RepeatedPtrField<SubMessageType>*, but we access it through
+  // its base class RepeatedPtrFieldBase*.
+  static void* StartRepeatedSubMessage(void* _r, void *fval) {
+    const SubMessageHandlerData* info =
+        static_cast<const SubMessageHandlerData*>(fval);
+    proto2::internal::RepeatedPtrFieldBase *r =
+        static_cast<proto2::internal::RepeatedPtrFieldBase*>(_r);
+    void *submsg = r->AddFromCleared<RepeatedMessageTypeHandler>();
+    if (!submsg) {
+      submsg = info->prototype()->New();
+      r->AddAllocated<RepeatedMessageTypeHandler>(submsg);
+    }
+    return submsg;
+  }
+};
+
+bool TrySetProto1WriteHandlers(const proto2::FieldDescriptor* proto2_f,
+                               const proto2::Message& m,
+                               const upb::FieldDef* upb_f, upb::Handlers* h) {
+  return P2R_Handlers::TrySet(proto2_f, m, upb_f, h);
+}
+
+const proto2::Message* GetProto1WeakPrototype(
+    const proto2::Message& m,
+    const proto2::FieldDescriptor* f) {
+  return P2R_Handlers::GetWeakPrototype(m, f);
+}
+
+const proto2::Message* GetProto1FieldPrototype(
+    const proto2::Message& m,
+    const proto2::FieldDescriptor* f) {
+  return P2R_Handlers::GetFieldPrototype(m, f);
+}
+
+}  // namespace google
+}  // namespace upb
--- a/upb/google/proto1.h
+++ b/upb/google/proto1.h
@ -0,0 +1,53 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// Support for registering field handlers that can write into a legacy proto1
+// message.  This functionality is only needed inside Google.
+//
+// This is a low-level interface; the high-level interface in google.h is
+// more user-friendly.
+
+#ifndef UPB_GOOGLE_PROTO1_H_
+#define UPB_GOOGLE_PROTO1_H_
+
+namespace proto2 {
+class FieldDescriptor;
+class Message;
+}
+
+namespace upb {
+class FieldDef;
+class Handlers;
+}
+
+namespace upb {
+namespace google {
+
+// Sets field handlers in the given Handlers object for writing to a single
+// field (as described by "proto2_f" and "upb_f") into a message constructed
+// by the same factory as "prototype."  Returns true if this was successful
+// (this will fail if "prototype" is not a proto1 message, or if we can't
+// handle it for some reason).
+bool TrySetProto1WriteHandlers(const proto2::FieldDescriptor* proto2_f,
+                               const proto2::Message& prototype,
+                               const upb::FieldDef* upb_f, upb::Handlers* h);
+
+// Returns a prototype for the given field in "m", if it is weak.  The returned
+// message could be the linked-in message type or OpaqueMessage, if the weak
+// message is *not* linked in.  Otherwise returns NULL.
+const proto2::Message* GetProto1WeakPrototype(
+    const proto2::Message& m,
+    const proto2::FieldDescriptor* f);
+
+// Returns a prototype for the given non-weak field in "m".
+const proto2::Message* GetProto1FieldPrototype(
+    const proto2::Message& m,
+    const proto2::FieldDescriptor* f);
+
+}  // namespace google
+}  // namespace upb
+
+#endif  // UPB_GOOGLE_PROTO1_H_
--- a/upb/google/proto2.cc
+++ b/upb/google/proto2.cc
@ -0,0 +1,632 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// Note that we have received an exception from c-style-artiters regarding
+// dynamic_cast<> in this file:
+// https://groups.google.com/a/google.com/d/msg/c-style/7Zp_XCX0e7s/I6dpzno4l-MJ
+//
+// IMPORTANT NOTE!  This file is compiled TWICE, once with UPB_GOOGLE3 defined
+// and once without!  This allows us to provide functionality against proto2
+// and protobuf opensource both in a single binary without the two conflicting.
+// However we must be careful not to violate the ODR.
+
+#include "upb/google/proto2.h"
+
+#include "upb/google/proto1.h"
+#include "upb/bytestream.h"
+#include "upb/def.h"
+#include "upb/handlers.h"
+
+namespace upb {
+namespace proto2_bridge_google3 { class FieldAccessor; }
+namespace proto2_bridge_opensource { class FieldAccessor; }
+}  // namespace upb
+
+// BEGIN DOUBLE COMPILATION TRICKERY. //////////////////////////////////////////
+
+#ifdef UPB_GOOGLE3
+
+// TODO(haberman): friend upb so that this isn't required.
+#define protected public
+#include "net/proto2/public/repeated_field.h"
+#undef protected
+
+#define private public
+#include "net/proto2/public/generated_message_reflection.h"
+#undef private
+
+#include "net/proto2/proto/descriptor.pb.h"
+#include "net/proto2/public/descriptor.h"
+#include "net/proto2/public/lazy_field.h"
+#include "net/proto2/public/message.h"
+#include "net/proto2/public/string_piece_field_support.h"
+#include "upb/google/cord.h"
+
+namespace goog = ::proto2;
+namespace me = ::upb::proto2_bridge_google3;
+
+#else
+
+// TODO(haberman): friend upb so that this isn't required.
+#define protected public
+#include "google/protobuf/repeated_field.h"
+#undef protected
+
+#define private public
+#include "google/protobuf/generated_message_reflection.h"
+#undef private
+
+#include "google/protobuf/descriptor.h"
+#include "google/protobuf/descriptor.pb.h"
+#include "google/protobuf/message.h"
+
+namespace goog = ::google::protobuf;
+namespace me = ::upb::proto2_bridge_opensource;
+
+#endif  // ifdef UPB_GOOGLE3
+
+// END DOUBLE COMPILATION TRICKERY. ////////////////////////////////////////////
+
+// Have to define this manually since older versions of proto2 didn't define
+// an enum value for STRING.
+#define UPB_CTYPE_STRING 0
+
+template<class T> static T* GetPointer(void *message, size_t offset) {
+  return reinterpret_cast<T*>(static_cast<char*>(message) + offset);
+}
+
+// This class contains handlers that can write into a proto2 class whose
+// reflection class is GeneratedMessageReflection.  (Despite the name, even
+// DynamicMessage uses GeneratedMessageReflection, so this covers all proto2
+// messages generated by the compiler.)  To do this it must break the
+// encapsulation of GeneratedMessageReflection and therefore depends on
+// internal interfaces that are not guaranteed to be stable.  This class will
+// need to be updated if any non-backward-compatible changes are made to
+// GeneratedMessageReflection.
+//
+// TODO(haberman): change class name?  In retrospect, "FieldAccessor" isn't the
+// best (something more specific like GeneratedMessageReflectionHandlers or
+// GMR_Handlers would be better) but we're depending on a "friend" declaration
+// in proto2 that already specifies "FieldAccessor."  No versions of proto2 have
+// been released that include the "friend FieldAccessor" declaration, so there's
+// still time to change this.  On the other hand, perhaps it's simpler to just
+// rely on "#define private public" since it may be a long time before new
+// versions of proto2 open source are pervasive enough that we can remove this
+// anyway.
+class me::FieldAccessor {
+ public:
+  // Returns true if we were able to set an accessor and any other properties
+  // of the FieldDef that are necessary to read/write this field to a
+  // proto2::Message.
+  static bool TrySet(const goog::FieldDescriptor* proto2_f,
+                     const goog::Message& m,
+                     const upb::FieldDef* upb_f, upb::Handlers* h) {
+    const goog::Reflection* base_r = m.GetReflection();
+    // See file comment re: dynamic_cast.
+    const goog::internal::GeneratedMessageReflection* r =
+        dynamic_cast<const goog::internal::GeneratedMessageReflection*>(base_r);
+    if (!r) return false;
+    // Extensions not supported yet.
+    if (proto2_f->is_extension()) return false;
+
+    switch (proto2_f->cpp_type()) {
+#define PRIMITIVE_TYPE(cpptype, cident) \
+      case goog::FieldDescriptor::cpptype: \
+        SetPrimitiveHandlers<cident>(proto2_f, r, upb_f, h); return true;
+      PRIMITIVE_TYPE(CPPTYPE_INT32,  int32_t);
+      PRIMITIVE_TYPE(CPPTYPE_INT64,  int64_t);
+      PRIMITIVE_TYPE(CPPTYPE_UINT32, uint32_t);
+      PRIMITIVE_TYPE(CPPTYPE_UINT64, uint64_t);
+      PRIMITIVE_TYPE(CPPTYPE_DOUBLE, double);
+      PRIMITIVE_TYPE(CPPTYPE_FLOAT,  float);
+      PRIMITIVE_TYPE(CPPTYPE_BOOL,   bool);
+#undef PRIMITIVE_TYPE
+      case goog::FieldDescriptor::CPPTYPE_ENUM:
+        SetEnumHandlers(proto2_f, r, upb_f, h);
+        return true;
+      case goog::FieldDescriptor::CPPTYPE_STRING: {
+        // Old versions of the open-source protobuf release erroneously default
+        // to Cord even though that has never been supported in the open-source
+        // release.
+        int32_t ctype = proto2_f->options().has_ctype() ?
+            proto2_f->options().ctype() : UPB_CTYPE_STRING;
+        switch (ctype) {
+#ifdef UPB_GOOGLE3
+          case goog::FieldOptions::STRING:
+            SetStringHandlers<string>(proto2_f, m, r, upb_f, h);
+            return true;
+          case goog::FieldOptions::CORD:
+            SetCordHandlers(proto2_f, r, upb_f, h);
+            return true;
+          case goog::FieldOptions::STRING_PIECE:
+            SetStringPieceHandlers(proto2_f, r, upb_f, h);
+            return true;
+#else
+          case UPB_CTYPE_STRING:
+            SetStringHandlers<std::string>(proto2_f, m, r, upb_f, h);
+            return true;
+#endif
+          default:
+            return false;
+        }
+      }
+      case goog::FieldDescriptor::CPPTYPE_MESSAGE:
+#ifdef UPB_GOOGLE3
+        if (proto2_f->options().lazy()) {
+          return false;  // Not yet implemented.
+        } else {
+          SetSubMessageHandlers(proto2_f, m, r, upb_f, h);
+          return true;
+        }
+#else
+        SetSubMessageHandlers(proto2_f, m, r, upb_f, h);
+        return true;
+#endif
+      default:
+        return false;
+    }
+  }
+
+  static const goog::Message* GetFieldPrototype(
+      const goog::Message& m,
+      const goog::FieldDescriptor* f) {
+    // We assume that all submessages (and extensions) will be constructed
+    // using the same MessageFactory as this message.  This doesn't cover the
+    // case of CodedInputStream::SetExtensionRegistry().
+    // See file comment re: dynamic_cast.
+    const goog::internal::GeneratedMessageReflection* r =
+        dynamic_cast<const goog::internal::GeneratedMessageReflection*>(
+            m.GetReflection());
+    if (!r) return NULL;
+    return r->message_factory_->GetPrototype(f->message_type());
+  }
+
+ private:
+  static upb_selector_t GetSelector(const upb::FieldDef* f,
+                                    upb::Handlers::Type type) {
+    upb::Handlers::Selector selector;
+    bool ok = upb::Handlers::GetSelector(f, type, &selector);
+    UPB_ASSERT_VAR(ok, ok);
+    return selector;
+  }
+
+  static int64_t GetHasbit(
+      const goog::FieldDescriptor* f,
+      const goog::internal::GeneratedMessageReflection* r) {
+    // proto2 does not store hasbits for repeated fields.
+    assert(!f->is_repeated());
+    return (r->has_bits_offset_ * 8) + f->index();
+  }
+
+  static uint16_t GetOffset(
+      const goog::FieldDescriptor* f,
+      const goog::internal::GeneratedMessageReflection* r) {
+    return r->offsets_[f->index()];
+  }
+
+  class FieldOffset {
+   public:
+    FieldOffset(
+        const goog::FieldDescriptor* f,
+        const goog::internal::GeneratedMessageReflection* r)
+        : offset_(GetOffset(f, r)),
+          is_repeated_(f->is_repeated()) {
+      if (!is_repeated_) {
+        int64_t hasbit = GetHasbit(f, r);
+        hasbyte_ = hasbit / 8;
+        mask_ = 1 << (hasbit % 8);
+      }
+    }
+
+    template<class T> T* GetFieldPointer(void *message) const {
+      return GetPointer<T>(message, offset_);
+    }
+
+    void SetHasbit(void* m) const {
+      assert(!is_repeated_);
+      uint8_t* byte = GetPointer<uint8_t>(m, hasbyte_);
+      *byte |= mask_;
+    }
+
+   private:
+    const size_t offset_;
+    bool is_repeated_;
+
+    // Only for non-repeated fields.
+    int32_t hasbyte_;
+    int8_t mask_;
+  };
+
+  // StartSequence /////////////////////////////////////////////////////////////
+
+  static void SetStartSequenceHandler(
+      const goog::FieldDescriptor* proto2_f,
+      const goog::internal::GeneratedMessageReflection* r,
+      const upb::FieldDef* f, upb::Handlers* h) {
+    assert(f->IsSequence());
+    h->SetStartSequenceHandler(
+        f, &PushOffset, new FieldOffset(proto2_f, r),
+        &upb::DeletePointer<FieldOffset>);
+  }
+
+  static void* PushOffset(void *m, void *fval) {
+    const FieldOffset* offset = static_cast<FieldOffset*>(fval);
+    return offset->GetFieldPointer<void>(m);
+  }
+
+  // Primitive Value (numeric, bool) ///////////////////////////////////////////
+
+  template <typename T> static void SetPrimitiveHandlers(
+      const goog::FieldDescriptor* proto2_f,
+      const goog::internal::GeneratedMessageReflection* r,
+      const upb::FieldDef* f,
+      upb::Handlers* h) {
+    if (f->IsSequence()) {
+      SetStartSequenceHandler(proto2_f, r, f, h);
+      h->SetValueHandler<T>(f, &AppendPrimitive<T>, NULL, NULL);
+    } else {
+      upb::SetStoreValueHandler<T>(
+          f, GetOffset(proto2_f, r), GetHasbit(proto2_f, r), h);
+    }
+  }
+
+  template <typename T>
+  static bool AppendPrimitive(void *_r, void *fval, T val) {
+    UPB_UNUSED(fval);
+    goog::RepeatedField<T>* r = static_cast<goog::RepeatedField<T>*>(_r);
+    r->Add(val);
+    return true;
+  }
+
+  // Enum //////////////////////////////////////////////////////////////////////
+
+  class EnumHandlerData : public FieldOffset {
+   public:
+    EnumHandlerData(
+        const goog::FieldDescriptor* proto2_f,
+        const goog::internal::GeneratedMessageReflection* r,
+        const upb::FieldDef* f)
+        : FieldOffset(proto2_f, r),
+          field_number_(f->number()),
+          unknown_fields_offset_(r->unknown_fields_offset_),
+          enum_(upb_downcast_enumdef(f->subdef())) {
+    }
+
+    bool IsValidValue(int32_t val) const {
+      return enum_->FindValueByNumber(val) != NULL;
+    }
+
+    int32_t field_number() const { return field_number_; }
+
+    goog::UnknownFieldSet* mutable_unknown_fields(goog::Message* m) const {
+      return GetPointer<goog::UnknownFieldSet>(m, unknown_fields_offset_);
+    }
+
+   private:
+    int32_t field_number_;
+    size_t unknown_fields_offset_;
+    const upb::EnumDef* enum_;
+  };
+
+  static void SetEnumHandlers(
+      const goog::FieldDescriptor* proto2_f,
+      const goog::internal::GeneratedMessageReflection* r,
+      const upb::FieldDef* f,
+      upb::Handlers* h) {
+    EnumHandlerData* data = new EnumHandlerData(proto2_f, r, f);
+    if (f->IsSequence()) {
+      h->SetInt32Handler(
+          f, &AppendEnum, data, &upb::DeletePointer<EnumHandlerData>);
+    } else {
+      h->SetInt32Handler(
+          f, &SetEnum, data, &upb::DeletePointer<EnumHandlerData>);
+    }
+  }
+
+  static bool SetEnum(void *_m, void *fval, int32_t val) {
+    goog::Message* m = static_cast<goog::Message*>(_m);
+    const EnumHandlerData* data = static_cast<const EnumHandlerData*>(fval);
+    if (data->IsValidValue(val)) {
+      int32_t* message_val = data->GetFieldPointer<int32_t>(m);
+      *message_val = val;
+      data->SetHasbit(m);
+    } else {
+      data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val);
+    }
+    return true;
+  }
+
+  static bool AppendEnum(void *_m, void *fval, int32_t val) {
+    // Closure is the enclosing message.  We can't use the RepeatedField<> as
+    // the closure because we need to go back to the message for unrecognized
+    // enum values, which go into the unknown field set.
+    goog::Message* m = static_cast<goog::Message*>(_m);
+    const EnumHandlerData* data = static_cast<const EnumHandlerData*>(fval);
+    if (data->IsValidValue(val)) {
+      goog::RepeatedField<int32_t>* r =
+          data->GetFieldPointer<goog::RepeatedField<int32_t> >(m);
+      r->Add(val);
+    } else {
+      data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val);
+    }
+    return true;
+  }
+
+  // String ////////////////////////////////////////////////////////////////////
+
+  // For scalar (non-repeated) string fields.
+  template<class T>
+  class StringHandlerData : public FieldOffset {
+   public:
+    StringHandlerData(const goog::FieldDescriptor* proto2_f,
+                      const goog::internal::GeneratedMessageReflection* r,
+                      const goog::Message& prototype)
+        : FieldOffset(proto2_f, r) {
+      // "prototype" isn't guaranteed to be empty, so we create a copy to get
+      // the default string instance.
+      goog::Message* empty = prototype.New();
+      prototype_ = &r->GetStringReference(*empty, proto2_f, NULL);
+      delete empty;
+    }
+
+    const T* prototype() const { return prototype_; }
+
+    T** GetStringPointer(void *message) const {
+      return GetFieldPointer<T*>(message);
+    }
+
+   private:
+    const T* prototype_;
+  };
+
+  template <typename T> static void SetStringHandlers(
+      const goog::FieldDescriptor* proto2_f,
+      const goog::Message& m,
+      const goog::internal::GeneratedMessageReflection* r,
+      const upb::FieldDef* f,
+      upb::Handlers* h) {
+    h->SetStringHandler(f, &OnStringBuf<T>, NULL, NULL);
+    if (f->IsSequence()) {
+      SetStartSequenceHandler(proto2_f, r, f, h);
+      h->SetStartStringHandler(f, &StartRepeatedString<T>, NULL, NULL);
+    } else {
+      StringHandlerData<T>* data = new StringHandlerData<T>(proto2_f, r, m);
+      h->SetStartStringHandler(
+          f, &StartString<T>, data, &upb::DeletePointer<StringHandlerData<T> >);
+    }
+  }
+
+  // This needs to be templated because google3 string is not std::string.
+  template <typename T> static void* StartString(
+      void *m, void *fval, size_t size_hint) {
+    UPB_UNUSED(size_hint);
+    const StringHandlerData<T>* data =
+        static_cast<const StringHandlerData<T>*>(fval);
+    T** str = data->GetStringPointer(m);
+    data->SetHasbit(m);
+    // If it points to the default instance, we must create a new instance.
+    if (*str == data->prototype()) *str = new T();
+    (*str)->clear();
+    // reserve() here appears to hurt performance rather than help.
+    return *str;
+  }
+
+  template <typename T> static size_t OnStringBuf(
+      void *_str, void *fval, const char *buf, size_t n) {
+    UPB_UNUSED(fval);
+    T* str = static_cast<T*>(_str);
+    str->append(buf, n);
+    return n;
+  }
+
+
+  template <typename T>
+  static void* StartRepeatedString(void *_r, void *fval, size_t size_hint) {
+    UPB_UNUSED(size_hint);
+    UPB_UNUSED(fval);
+    goog::RepeatedPtrField<T>* r = static_cast<goog::RepeatedPtrField<T>*>(_r);
+    T* str = r->Add();
+    str->clear();
+    // reserve() here appears to hurt performance rather than help.
+    return str;
+  }
+
+  // SubMessage ////////////////////////////////////////////////////////////////
+
+  class SubMessageHandlerData : public FieldOffset {
+   public:
+    SubMessageHandlerData(
+        const goog::FieldDescriptor* f,
+        const goog::internal::GeneratedMessageReflection* r,
+        const goog::Message* prototype)
+        : FieldOffset(f, r),
+          prototype_(prototype) {
+    }
+
+    const goog::Message* prototype() const { return prototype_; }
+
+   private:
+    const goog::Message* const prototype_;
+  };
+
+  static void SetSubMessageHandlers(
+      const goog::FieldDescriptor* proto2_f,
+      const goog::Message& m,
+      const goog::internal::GeneratedMessageReflection* r,
+      const upb::FieldDef* f,
+      upb::Handlers* h) {
+    SubMessageHandlerData* data =
+        new SubMessageHandlerData(proto2_f, r, GetFieldPrototype(m, proto2_f));
+    upb::Handlers::Free* free = &upb::DeletePointer<SubMessageHandlerData>;
+    if (f->IsSequence()) {
+      SetStartSequenceHandler(proto2_f, r, f, h);
+      h->SetStartSubMessageHandler(f, &StartRepeatedSubMessage, data, free);
+    } else {
+      h->SetStartSubMessageHandler(f, &StartSubMessage, data, free);
+    }
+  }
+
+  static void* StartSubMessage(void *m, void *fval) {
+    const SubMessageHandlerData* data =
+        static_cast<const SubMessageHandlerData*>(fval);
+    data->SetHasbit(m);
+    goog::Message **subm = data->GetFieldPointer<goog::Message*>(m);
+    if (*subm == NULL || *subm == data->prototype()) {
+      *subm = data->prototype()->New();
+    }
+    return *subm;
+  }
+
+  class RepeatedMessageTypeHandler {
+   public:
+    typedef void Type;
+    // AddAllocated() calls this, but only if other objects are sitting
+    // around waiting for reuse, which we will not do.
+    static void Delete(Type* t) {
+      (void)t;
+      assert(false);
+    }
+  };
+
+  // Closure is a RepeatedPtrField<SubMessageType>*, but we access it through
+  // its base class RepeatedPtrFieldBase*.
+  static void* StartRepeatedSubMessage(void* _r, void *fval) {
+    const SubMessageHandlerData* data =
+        static_cast<const SubMessageHandlerData*>(fval);
+    goog::internal::RepeatedPtrFieldBase *r =
+        static_cast<goog::internal::RepeatedPtrFieldBase*>(_r);
+    void *submsg = r->AddFromCleared<RepeatedMessageTypeHandler>();
+    if (!submsg) {
+      submsg = data->prototype()->New();
+      r->AddAllocated<RepeatedMessageTypeHandler>(submsg);
+    }
+    return submsg;
+  }
+
+  // TODO(haberman): handle Extensions, Unknown Fields.
+
+#ifdef UPB_GOOGLE3
+  // Handlers for types/features only included in internal proto2 release:
+  // Cord, StringPiece, LazyField, and MessageSet.
+  // TODO(haberman): LazyField, MessageSet.
+
+  // Cord //////////////////////////////////////////////////////////////////////
+
+  static void SetCordHandlers(
+      const proto2::FieldDescriptor* proto2_f,
+      const proto2::internal::GeneratedMessageReflection* r,
+      const upb::FieldDef* f, upb::Handlers* h) {
+    h->SetStringHandler(f, &OnCordBuf, NULL, NULL);
+    if (f->IsSequence()) {
+      SetStartSequenceHandler(proto2_f, r, f, h);
+      h->SetStartStringHandler(f, &StartRepeatedCord, NULL, NULL);
+    } else {
+      h->SetStartStringHandler(
+          f, &StartCord, new FieldOffset(proto2_f, r),
+          &upb::DeletePointer<FieldOffset*>);
+    }
+  }
+
+  static void* StartCord(void *m, void *fval, size_t size_hint) {
+    UPB_UNUSED(size_hint);
+    const FieldOffset* offset = static_cast<const FieldOffset*>(fval);
+    offset->SetHasbit(m);
+    Cord* field = offset->GetFieldPointer<Cord>(m);
+    field->Clear();
+    return field;
+  }
+
+  static size_t OnCordBuf(void *_c, void *fval, const char *buf, size_t n) {
+    UPB_UNUSED(fval);
+    Cord* c = static_cast<Cord*>(_c);
+    c->Append(StringPiece(buf, n));
+    return n;
+  }
+
+  static void* StartRepeatedCord(void *_r, void *fval, size_t size_hint) {
+    UPB_UNUSED(size_hint);
+    UPB_UNUSED(fval);
+    proto2::RepeatedField<Cord>* r =
+        static_cast<proto2::RepeatedField<Cord>*>(_r);
+    return r->Add();
+  }
+
+  // StringPiece ///////////////////////////////////////////////////////////////
+
+  static void SetStringPieceHandlers(
+      const proto2::FieldDescriptor* proto2_f,
+      const proto2::internal::GeneratedMessageReflection* r,
+      const upb::FieldDef* f, upb::Handlers* h) {
+    h->SetStringHandler(f, &OnStringPieceBuf, NULL, NULL);
+    if (f->IsSequence()) {
+      SetStartSequenceHandler(proto2_f, r, f, h);
+      h->SetStartStringHandler(f, &StartRepeatedStringPiece, NULL, NULL);
+    } else {
+      h->SetStartStringHandler(
+          f, &StartStringPiece, new FieldOffset(proto2_f, r),
+          &upb::DeletePointer<FieldOffset*>);
+    }
+  }
+
+  static size_t OnStringPieceBuf(void *_f, void *fval,
+                                 const char *buf, size_t len) {
+    UPB_UNUSED(fval);
+    // TODO(haberman): alias if possible and enabled on the input stream.
+    // TODO(haberman): add a method to StringPieceField that lets us avoid
+    // this copy/malloc/free.
+    proto2::internal::StringPieceField* field =
+        static_cast<proto2::internal::StringPieceField*>(_f);
+    size_t new_len = field->size() + len;
+    char *data = new char[new_len];
+    memcpy(data, field->data(), field->size());
+    memcpy(data + field->size(), buf, len);
+    field->CopyFrom(StringPiece(data, new_len));
+    delete[] data;
+    return len;
+  }
+
+  static void* StartStringPiece(void *m, void *fval, size_t size_hint) {
+    UPB_UNUSED(size_hint);
+    const FieldOffset* offset = static_cast<const FieldOffset*>(fval);
+    offset->SetHasbit(m);
+    proto2::internal::StringPieceField* field =
+        offset->GetFieldPointer<proto2::internal::StringPieceField>(m);
+    field->Clear();
+    return field;
+  }
+
+  static void* StartRepeatedStringPiece(void* _r, void *fval,
+                                        size_t size_hint) {
+    UPB_UNUSED(size_hint);
+    UPB_UNUSED(fval);
+    typedef proto2::RepeatedPtrField<proto2::internal::StringPieceField>
+        RepeatedStringPiece;
+    RepeatedStringPiece* r = static_cast<RepeatedStringPiece*>(_r);
+    proto2::internal::StringPieceField* field = r->Add();
+    field->Clear();
+    return field;
+  }
+
+#endif  // UPB_GOOGLE3
+};
+
+namespace upb {
+namespace google {
+
+bool TrySetWriteHandlers(const goog::FieldDescriptor* proto2_f,
+                         const goog::Message& prototype,
+                         const upb::FieldDef* upb_f, upb::Handlers* h) {
+  return me::FieldAccessor::TrySet(proto2_f, prototype, upb_f, h);
+}
+
+const goog::Message* GetFieldPrototype(
+    const goog::Message& m,
+    const goog::FieldDescriptor* f) {
+  return me::FieldAccessor::GetFieldPrototype(m, f);
+}
+
+}  // namespace google
+}  // namespace upb
--- a/upb/google/proto2.h
+++ b/upb/google/proto2.h
@ -0,0 +1,62 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// Support for registering field handlers that can write into a proto2
+// message that uses GeneratedMessageReflection (which includes all messages
+// generated by the proto2 compiler as well as DynamicMessage).
+//
+// This is a low-level interface; the high-level interface in google.h is
+// more user-friendly.
+
+#ifndef UPB_GOOGLE_PROTO2_H_
+#define UPB_GOOGLE_PROTO2_H_
+
+namespace proto2 {
+class FieldDescriptor;
+class Message;
+}
+
+namespace google {
+namespace protobuf {
+class FieldDescriptor;
+class Message;
+}
+}
+
+namespace upb {
+class FieldDef;
+class Handlers;
+}
+
+namespace upb {
+namespace google {
+
+// Sets field handlers in the given Handlers object for writing to a single
+// field (as described by "proto2_f" and "upb_f") into a message constructed
+// by the same factory as "prototype."  Returns true if this was successful
+// (this will fail if "prototype" is not a proto1 message, or if we can't
+// handle it for some reason).
+bool TrySetWriteHandlers(const proto2::FieldDescriptor* proto2_f,
+                         const proto2::Message& prototype,
+                         const upb::FieldDef* upb_f, upb::Handlers* h);
+bool TrySetWriteHandlers(const ::google::protobuf::FieldDescriptor* proto2_f,
+                         const ::google::protobuf::Message& prototype,
+                         const upb::FieldDef* upb_f, upb::Handlers* h);
+
+// Returns a prototype for the given field in "m", if it is weak.  The returned
+// message could be the linked-in message type or OpaqueMessage, if the weak
+// message is *not* linked in.  Otherwise returns NULL.
+const proto2::Message* GetFieldPrototype(
+    const proto2::Message& m,
+    const proto2::FieldDescriptor* f);
+const ::google::protobuf::Message* GetFieldPrototype(
+    const ::google::protobuf::Message& m,
+    const ::google::protobuf::FieldDescriptor* f);
+
+}  // namespace google
+}  // namespace upb
+
+#endif  // UPB_GOOGLE_PROTO2_H_
--- a/upb/handlers.c
+++ b/upb/handlers.c
@ -1,292 +1,385 @@
 /*
 * upb - a minimalist implementation of protocol buffers.
 *
- * Copyright (c) 2011 Google Inc.  See LICENSE for details.
+ * Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
 * Author: Josh Haberman <jhaberman@gmail.com>
 */

-#include <stdlib.h>
 #include "upb/handlers.h"

+#include <stdlib.h>
+#include <string.h>
+
+// Defined for the sole purpose of having a unique pointer value for
+// UPB_NO_CLOSURE.
+char _upb_noclosure;
+
+typedef struct {
+  upb_func *handler;
+
+  // Could put either or both of these in a separate table to save memory when
+  // they are sparse.
+  void *data;
+  upb_handlerfree *cleanup;
+
+  // TODO(haberman): this is wasteful; only the first "fieldhandler" of a
+  // submessage field needs this.  To reduce memory footprint we should either:
+  // - put the subhandlers in a separate "fieldhandler", stored as part of
+  //   a union with one of the above fields.
+  // - count selector offsets by individual pointers instead of by whole
+  //   fieldhandlers.
+  const upb_handlers *subhandlers;
+} fieldhandler;
+
+static const fieldhandler *getfh(
+    const upb_handlers *h, upb_selector_t selector) {
+  assert(selector < upb_handlers_msgdef(h)->selector_count);
+  fieldhandler* fhbase = (void*)&h->fh_base;
+  return &fhbase[selector];
+}

-/* upb_mhandlers **************************************************************/
+static fieldhandler *getfh_mutable(upb_handlers *h, upb_selector_t selector) {
+  return (fieldhandler*)getfh(h, selector);
+}

-static upb_mhandlers *upb_mhandlers_new(void) {
-  upb_mhandlers *m = malloc(sizeof(*m));
-  upb_inttable_init(&m->fieldtab);
-  m->startmsg = NULL;
-  m->endmsg = NULL;
-  m->is_group = false;
-#ifdef UPB_USE_JIT_X64
-  m->tablearray = NULL;
-#endif
-  return m;
+bool upb_handlers_isfrozen(const upb_handlers *h) {
+  return upb_refcounted_isfrozen(upb_upcast(h));
 }

-static upb_fhandlers *_upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
-                                                  upb_fieldtype_t type,
-                                                  bool repeated) {
-  const upb_value *v = upb_inttable_lookup(&m->fieldtab, n);
-  // TODO: design/refine the API for changing the set of fields or modifying
-  // existing handlers.
-  if (v) return NULL;
-  upb_fhandlers new_f = {type, repeated, 0,
-      n, -1, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL,
-#ifdef UPB_USE_JIT_X64
-      0, 0, 0,
-#endif
-  };
-  upb_fhandlers *ptr = malloc(sizeof(*ptr));
-  memcpy(ptr, &new_f, sizeof(upb_fhandlers));
-  upb_inttable_insert(&m->fieldtab, n, upb_value_ptr(ptr));
-  return ptr;
+uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
+  return upb_fielddef_isseq(f) ? 2 : 0;
 }

-upb_fhandlers *upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
-                                          upb_fieldtype_t type, bool repeated) {
-  assert(type != UPB_TYPE(MESSAGE));
-  assert(type != UPB_TYPE(GROUP));
-  return _upb_mhandlers_newfhandlers(m, n, type, repeated);
+uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
+  uint32_t ret = 1;
+  if (upb_fielddef_isstring(f)) ret += 2;  // STARTSTR/ENDSTR
+  if (upb_fielddef_isseq(f)) ret += 2;  // STARTSEQ/ENDSEQ
+  if (upb_fielddef_issubmsg(f)) ret += 2;  // STARTSUBMSG/ENDSUBMSG
+  return ret;
 }

-upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n,
-                                               upb_fieldtype_t type,
-                                               bool repeated,
-                                               upb_mhandlers *subm) {
-  assert(type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP));
-  assert(subm);
-  upb_fhandlers *f = _upb_mhandlers_newfhandlers(m, n, type, repeated);
-  if (!f) return NULL;
-  f->submsg = subm;
-  if (type == UPB_TYPE(GROUP))
-    _upb_mhandlers_newfhandlers(subm, n, UPB_TYPE_ENDGROUP, false);
-  return f;
+upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
+  switch (upb_fielddef_type(f)) {
+    case UPB_TYPE_INT32:
+    case UPB_TYPE_SINT32:
+    case UPB_TYPE_SFIXED32:
+    case UPB_TYPE_ENUM:
+      return UPB_HANDLER_INT32;
+    case UPB_TYPE_INT64:
+    case UPB_TYPE_SINT64:
+    case UPB_TYPE_SFIXED64:
+      return UPB_HANDLER_INT64;
+    case UPB_TYPE_UINT32:
+    case UPB_TYPE_FIXED32:
+      return UPB_HANDLER_UINT32;
+    case UPB_TYPE_UINT64:
+    case UPB_TYPE_FIXED64:
+      return UPB_HANDLER_UINT64;
+    case UPB_TYPE_FLOAT:
+      return UPB_HANDLER_FLOAT;
+    case UPB_TYPE_DOUBLE:
+      return UPB_HANDLER_DOUBLE;
+    case UPB_TYPE_BOOL:
+      return UPB_HANDLER_BOOL;
+    default: assert(false); return -1;  // Invalid input.
+  }
 }

-upb_fhandlers *upb_mhandlers_lookup(const upb_mhandlers *m, uint32_t n) {
-  const upb_value *v = upb_inttable_lookup(&m->fieldtab, n);
-  return v ? upb_value_getptr(*v) : NULL;
+bool upb_getselector(
+    const upb_fielddef *f, upb_handlertype_t type, upb_selector_t *s) {
+  // If the type checks in this function are a hot-spot, we can introduce a
+  // separate function that calculates the selector assuming that the type
+  // is correct (may even want to make it inline for the upb_sink fast-path.
+  switch (type) {
+    case UPB_HANDLER_INT32:
+    case UPB_HANDLER_INT64:
+    case UPB_HANDLER_UINT32:
+    case UPB_HANDLER_UINT64:
+    case UPB_HANDLER_FLOAT:
+    case UPB_HANDLER_DOUBLE:
+    case UPB_HANDLER_BOOL:
+      if (!upb_fielddef_isprimitive(f) ||
+          upb_handlers_getprimitivehandlertype(f) != type)
+        return false;
+      *s = f->selector_base;
+      break;
+    case UPB_HANDLER_STARTSTR:
+      if (!upb_fielddef_isstring(f)) return false;
+      *s = f->selector_base;
+      break;
+    case UPB_HANDLER_STRING:
+      if (!upb_fielddef_isstring(f)) return false;
+      *s = f->selector_base + 1;
+      break;
+    case UPB_HANDLER_ENDSTR:
+      if (!upb_fielddef_isstring(f)) return false;
+      *s = f->selector_base + 2;
+      break;
+    case UPB_HANDLER_STARTSEQ:
+      if (!upb_fielddef_isseq(f)) return false;
+      *s = f->selector_base - 2;
+      break;
+    case UPB_HANDLER_ENDSEQ:
+      if (!upb_fielddef_isseq(f)) return false;
+      *s = f->selector_base - 1;
+      break;
+    case UPB_HANDLER_STARTSUBMSG:
+      if (!upb_fielddef_issubmsg(f)) return false;
+      *s = f->selector_base + 1;
+      break;
+    case UPB_HANDLER_ENDSUBMSG:
+      if (!upb_fielddef_issubmsg(f)) return false;
+      *s = f->selector_base + 2;
+      break;
+  }
+  assert(*s < upb_fielddef_msgdef(f)->selector_count);
+  return true;
 }

+void upb_handlers_ref(const upb_handlers *h, const void *owner) {
+  upb_refcounted_ref(upb_upcast(h), owner);
+}

-/* upb_handlers ***************************************************************/
+void upb_handlers_unref(const upb_handlers *h, const void *owner) {
+  upb_refcounted_unref(upb_upcast(h), owner);
+}

-upb_handlers *upb_handlers_new() {
-  upb_handlers *h = malloc(sizeof(*h));
-  h->refcount = 1;
-  h->msgs_len = 0;
-  h->msgs_size = 4;
-  h->msgs = malloc(h->msgs_size * sizeof(*h->msgs));
-  h->should_jit = true;
-  return h;
+void upb_handlers_donateref(
+    const upb_handlers *h, const void *from, const void *to) {
+  upb_refcounted_donateref(upb_upcast(h), from, to);
 }

-void upb_handlers_ref(upb_handlers *h) { h->refcount++; }
-
-void upb_handlers_unref(upb_handlers *h) {
-  if (--h->refcount == 0) {
-    for (int i = 0; i < h->msgs_len; i++) {
-      upb_mhandlers *mh = h->msgs[i];
-      upb_inttable_iter j;
-      upb_inttable_begin(&j, &mh->fieldtab);
-      for(; !upb_inttable_done(&j); upb_inttable_next(&j)) {
-        free(upb_value_getptr(upb_inttable_iter_value(&j)));
-      }
-      upb_inttable_uninit(&mh->fieldtab);
-#ifdef UPB_USE_JIT_X64
-      free(mh->tablearray);
-#endif
-      free(mh);
-    }
-    free(h->msgs);
-    free(h);
-  }
+void upb_handlers_checkref(const upb_handlers *h, const void *owner) {
+  upb_refcounted_checkref(upb_upcast(h), owner);
+}
+
+static void do_cleanup(upb_handlers* h, const upb_fielddef *f,
+                       upb_handlertype_t type) {
+  upb_selector_t selector;
+  if (!upb_getselector(f, type, &selector)) return;
+  fieldhandler *fh = getfh_mutable(h, selector);
+  if (fh->cleanup) fh->cleanup(fh->data);
+  fh->cleanup = NULL;
+  fh->data = NULL;
 }

-upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h) {
-  if (h->msgs_len == h->msgs_size) {
-    h->msgs_size *= 2;
-    h->msgs = realloc(h->msgs, h->msgs_size * sizeof(*h->msgs));
+static void freehandlers(upb_refcounted *r) {
+  upb_handlers *h = (upb_handlers*)r;
+  upb_msg_iter i;
+  for(upb_msg_begin(&i, h->msg); !upb_msg_done(&i); upb_msg_next(&i)) {
+    upb_fielddef *f = upb_msg_iter_field(&i);
+    for (upb_handlertype_t type = 0; type < UPB_HANDLER_MAX; type++)
+      do_cleanup(h, f, type);
  }
-  upb_mhandlers *mh = upb_mhandlers_new();
-  h->msgs[h->msgs_len++] = mh;
-  return mh;
+  upb_msgdef_unref(h->msg, h);
+  free(h);
 }

-static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, const upb_msgdef *m,
-                                     upb_onmsgreg *msgreg_cb,
-                                     upb_onfieldreg *fieldreg_cb,
-                                     void *closure, upb_strtable *mtab) {
-  upb_mhandlers *mh = upb_handlers_newmhandlers(h);
-  upb_strtable_insert(mtab, upb_def_fullname(UPB_UPCAST(m)), upb_value_ptr(mh));
-  if (msgreg_cb) msgreg_cb(closure, mh, m);
+static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit,
+                          void *closure) {
+  const upb_handlers *h = (const upb_handlers*)r;
  upb_msg_iter i;
-  for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+  for(upb_msg_begin(&i, h->msg); !upb_msg_done(&i); upb_msg_next(&i)) {
    upb_fielddef *f = upb_msg_iter_field(&i);
-    upb_fhandlers *fh;
-    if (upb_issubmsg(f)) {
-      upb_mhandlers *sub_mh;
-      const upb_value *subm_ent;
-      // The table lookup is necessary to break the DFS for type cycles.
-      const char *subname = upb_def_fullname(upb_fielddef_subdef(f));
-      if ((subm_ent = upb_strtable_lookup(mtab, subname)) != NULL) {
-        sub_mh = upb_value_getptr(*subm_ent);
-      } else {
-        sub_mh = upb_regmsg_dfs(
-            h, upb_downcast_msgdef_const(upb_fielddef_subdef(f)),
-            msgreg_cb, fieldreg_cb, closure, mtab);
-      }
-      fh = upb_mhandlers_newfhandlers_subm(
-          mh, f->number, f->type, upb_isseq(f), sub_mh);
-    } else {
-      fh = upb_mhandlers_newfhandlers(mh, f->number, f->type, upb_isseq(f));
-    }
-    if (fieldreg_cb) fieldreg_cb(closure, fh, f);
+    if (!upb_fielddef_issubmsg(f)) continue;
+    const upb_handlers *sub = upb_handlers_getsubhandlers(h, f);
+    if (sub) visit(r, upb_upcast(sub), closure);
  }
-  return mh;
 }

-upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m,
-                                      upb_onmsgreg *msgreg_cb,
-                                      upb_onfieldreg *fieldreg_cb,
-                                      void *closure) {
-  upb_strtable mtab;
-  upb_strtable_init(&mtab);
-  upb_mhandlers *ret =
-      upb_regmsg_dfs(h, m, msgreg_cb, fieldreg_cb, closure, &mtab);
-  upb_strtable_uninit(&mtab);
-  return ret;
+upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) {
+  assert(upb_msgdef_isfrozen(md));
+  static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers};
+  size_t fhandlers_size = sizeof(fieldhandler) * md->selector_count;
+  upb_handlers *h = calloc(sizeof(*h) - sizeof(void*) + fhandlers_size, 1);
+  if (!h) return NULL;
+  h->msg = md;
+  upb_msgdef_ref(h->msg, h);
+  if (!upb_refcounted_init(upb_upcast(h), &vtbl, owner)) goto oom;
+
+  // calloc() above initialized all handlers to NULL.
+  return h;
+
+oom:
+  freehandlers(upb_upcast(h));
+  return NULL;
 }

+bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) {
+  // TODO: verify we have a transitive closure.
+  return upb_refcounted_freeze((upb_refcounted*const*)handlers, n, s);
+}
+
+const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }

-/* upb_dispatcher *************************************************************/
-
-void upb_dispatcher_init(upb_dispatcher *d, upb_status *status,
-                         upb_exit_handler UPB_NORETURN *exit,
-                         void *srcclosure) {
-  d->stack[0].f = NULL;  // Should never be read.
-  d->limit = &d->stack[UPB_MAX_NESTING];
-  d->exitjmp = exit;
-  d->srcclosure = srcclosure;
-  d->top_is_implicit = false;
-  d->msgent = NULL;
-  d->top = NULL;
-  d->toplevel_msgent = NULL;
-  d->status = status;
+void upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handler *handler) {
+  assert(!upb_handlers_isfrozen(h));
+  h->startmsg = handler;
 }

-upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure,
-                                           upb_mhandlers *top) {
-  d->msgent = top;
-  d->toplevel_msgent = top;
-  d->top = d->stack;
-  d->top->closure = closure;
-  d->top->is_sequence = false;
-  d->top->is_packed = false;
-  return d->top;
+upb_startmsg_handler *upb_handlers_getstartmsg(const upb_handlers *h) {
+  return h->startmsg;
 }

-void upb_dispatcher_uninit(upb_dispatcher *d) {
-  (void)d;
+void upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handler *handler) {
+  assert(!upb_handlers_isfrozen(h));
+  h->endmsg = handler;
 }

-void upb_dispatch_startmsg(upb_dispatcher *d) {
-  upb_flow_t flow = UPB_CONTINUE;
-  if (d->msgent->startmsg) d->msgent->startmsg(d->top->closure);
-  if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d);
+upb_endmsg_handler *upb_handlers_getendmsg(const upb_handlers *h) {
+  return h->endmsg;
 }

-void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) {
-  assert(d->top == d->stack);
-  if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, d->status);
-  // TODO: should we avoid this copy by passing client's status obj to cbs?
-  upb_status_copy(status, d->status);
+// For now we stuff the subhandlers pointer into the fieldhandlers*
+// corresponding to the UPB_HANDLER_STARTSUBMSG handler.
+static const upb_handlers **subhandlersptr(upb_handlers *h,
+                                           const upb_fielddef *f) {
+  assert(upb_fielddef_issubmsg(f));
+  upb_selector_t selector;
+  bool ok = upb_getselector(f, UPB_HANDLER_STARTSUBMSG, &selector);
+  UPB_ASSERT_VAR(ok, ok);
+  return &getfh_mutable(h, selector)->subhandlers;
 }

-upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
-                                            upb_fhandlers *f) {
-  if (d->top + 1 >= d->limit) {
-    upb_status_seterrliteral(d->status, "Nesting too deep.");
-    _upb_dispatcher_abortjmp(d);
+bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
+                                 const upb_handlers *sub) {
+  assert(!upb_handlers_isfrozen(h));
+  if (!upb_fielddef_issubmsg(f)) return false;
+  if (sub != NULL &&
+      upb_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) {
+    return false;
  }
+  const upb_handlers **stored = subhandlersptr(h, f);
+  const upb_handlers *old = *stored;
+  if (old) upb_unref2(old, h);
+  *stored = sub;
+  if (sub) upb_ref2(sub, h);
+  return true;
+}

-  upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
-  if (f->startseq) sflow = f->startseq(d->top->closure, f->fval);
-  _upb_dispatcher_sethas(d->top->closure, f->hasbit);
-  if (sflow.flow != UPB_CONTINUE) {
-    _upb_dispatcher_abortjmp(d);
-  }
+const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
+                                                const upb_fielddef *f) {
+  const upb_handlers **stored = subhandlersptr((upb_handlers*)h, f);
+  return *stored;
+}

-  ++d->top;
-  d->top->f = f;
-  d->top->is_sequence = true;
-  d->top->is_packed = false;
-  d->top->closure = sflow.closure;
-  return d->top;
+#define SETTER(name, handlerctype, handlertype) \
+  bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \
+                                handlerctype val, void *data, \
+                                upb_handlerfree *cleanup) { \
+    assert(!upb_handlers_isfrozen(h)); \
+    if (upb_handlers_msgdef(h) != upb_fielddef_msgdef(f)) return false; \
+    upb_selector_t selector; \
+    bool ok = upb_getselector(f, handlertype, &selector); \
+    if (!ok) return false; \
+    do_cleanup(h, f, handlertype); \
+    fieldhandler *fh = getfh_mutable(h, selector); \
+    fh->handler = (upb_func*)val; \
+    fh->data = (upb_func*)data; \
+    fh->cleanup = (upb_func*)cleanup; \
+    return true; \
+  } \
+
+SETTER(int32,       upb_int32_handler*,       UPB_HANDLER_INT32);
+SETTER(int64,       upb_int64_handler*,       UPB_HANDLER_INT64);
+SETTER(uint32,      upb_uint32_handler*,      UPB_HANDLER_UINT32);
+SETTER(uint64,      upb_uint64_handler*,      UPB_HANDLER_UINT64);
+SETTER(float,       upb_float_handler*,       UPB_HANDLER_FLOAT);
+SETTER(double,      upb_double_handler*,      UPB_HANDLER_DOUBLE);
+SETTER(bool,        upb_bool_handler*,        UPB_HANDLER_BOOL);
+SETTER(startstr,    upb_startstr_handler*,    UPB_HANDLER_STARTSTR);
+SETTER(string,      upb_string_handler*,      UPB_HANDLER_STRING);
+SETTER(endstr,      upb_endfield_handler*,    UPB_HANDLER_ENDSTR);
+SETTER(startseq,    upb_startfield_handler*,  UPB_HANDLER_STARTSEQ);
+SETTER(startsubmsg, upb_startfield_handler*,  UPB_HANDLER_STARTSUBMSG);
+SETTER(endsubmsg,   upb_endfield_handler*,    UPB_HANDLER_ENDSUBMSG);
+SETTER(endseq,      upb_endfield_handler*,    UPB_HANDLER_ENDSEQ);
+#undef SETTER
+
+upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s) {
+  return getfh(h, s)->handler;
 }

-upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) {
-  assert(d->top > d->stack);
-  assert(d->top->is_sequence);
-  upb_fhandlers *f = d->top->f;
-  --d->top;
-  upb_flow_t flow = UPB_CONTINUE;
-  if (f->endseq) flow = f->endseq(d->top->closure, f->fval);
-  if (flow != UPB_CONTINUE) {
-    _upb_dispatcher_abortjmp(d);
-  }
-  d->msgent = d->top->f ? d->top->f->submsg : d->toplevel_msgent;
-  return d->top;
+void *upb_handlers_gethandlerdata(const upb_handlers *h, upb_selector_t s) {
+  return getfh(h, s)->data;
 }

-upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
-                                               upb_fhandlers *f) {
-  if (d->top + 1 >= d->limit) {
-    upb_status_seterrliteral(d->status, "Nesting too deep.");
-    _upb_dispatcher_abortjmp(d);
-  }
+typedef struct {
+  upb_inttable tab;  // maps upb_msgdef* -> upb_handlers*.
+  upb_handlers_callback *callback;
+  void *closure;
+} dfs_state;

-  upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
-  if (f->startsubmsg) sflow = f->startsubmsg(d->top->closure, f->fval);
-  _upb_dispatcher_sethas(d->top->closure, f->hasbit);
-  if (sflow.flow != UPB_CONTINUE) {
-    _upb_dispatcher_abortjmp(d);
-  }
+static upb_handlers *newformsg(const upb_msgdef *m, const void *owner,
+                               dfs_state *s) {
+  upb_handlers *h = upb_handlers_new(m, owner);
+  if (!h) return NULL;
+  if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom;

-  ++d->top;
-  d->top->f = f;
-  d->top->is_sequence = false;
-  d->top->is_packed = false;
-  d->top->closure = sflow.closure;
-  d->msgent = f->submsg;
-  upb_dispatch_startmsg(d);
-  return d->top;
-}
+  s->callback(s->closure, h);

-upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d) {
-  assert(d->top > d->stack);
-  assert(!d->top->is_sequence);
-  upb_fhandlers *f = d->top->f;
-  if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, d->status);
-  d->msgent = d->top->f->msg;
-  --d->top;
-  upb_flow_t flow = UPB_CONTINUE;
-  if (f->endsubmsg) f->endsubmsg(d->top->closure, f->fval);
-  if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d);
-  return d->top;
-}
+  // For each submessage field, get or create a handlers object and set it as
+  // the subhandlers.
+  upb_msg_iter i;
+  for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+    upb_fielddef *f = upb_msg_iter_field(&i);
+    if (!upb_fielddef_issubmsg(f)) continue;

-bool upb_dispatcher_stackempty(upb_dispatcher *d) {
-  return d->top == d->stack;
-}
-bool upb_dispatcher_islegalend(upb_dispatcher *d) {
-  if (d->top == d->stack) return true;
-  if (d->top - 1 == d->stack &&
-      d->top->is_sequence && !d->top->is_packed) return true;
-  return false;
+    const upb_msgdef *subdef = upb_downcast_msgdef(upb_fielddef_subdef(f));
+    const upb_value *subm_ent = upb_inttable_lookupptr(&s->tab, subdef);
+    if (subm_ent) {
+      upb_handlers_setsubhandlers(h, f, upb_value_getptr(*subm_ent));
+    } else {
+      upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s);
+      if (!sub_mh) goto oom;
+      upb_handlers_setsubhandlers(h, f, sub_mh);
+      upb_handlers_unref(sub_mh, &sub_mh);
+    }
+  }
+  return h;
+
+oom:
+  upb_handlers_unref(h, owner);
+  return NULL;
 }

-void _upb_dispatcher_abortjmp(upb_dispatcher *d) {
-  d->exitjmp(d->srcclosure);
-  assert(false);  // Never returns.
+const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m,
+                                           const void *owner,
+                                           upb_handlers_callback *callback,
+                                           void *closure) {
+  dfs_state state;
+  state.callback = callback;
+  state.closure = closure;
+  if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL;
+
+  upb_handlers *ret = newformsg(m, owner, &state);
+  if (!ret) return NULL;
+  upb_refcounted *r = upb_upcast(ret);
+  upb_status status = UPB_STATUS_INIT;
+  bool ok = upb_refcounted_freeze(&r, 1, &status);
+  UPB_ASSERT_VAR(ok, ok);
+  upb_status_uninit(&status);
+
+  upb_inttable_uninit(&state.tab);
+  return ret;
 }
+
+#define STDMSG_WRITER(type, ctype)                                            \
+  bool upb_stdmsg_set ## type (void *_m, void *fval, ctype val) {             \
+    assert(_m != NULL);                                                       \
+    const upb_stdmsg_fval *f = fval;                                          \
+    uint8_t *m = _m;                                                          \
+    if (f->hasbit > 0)                                                        \
+      *(uint8_t*)&m[f->hasbit / 8] |= 1 << (f->hasbit % 8);                   \
+    *(ctype*)&m[f->offset] = val;                                             \
+    return true;                                                              \
+  }                                                                           \
+
+STDMSG_WRITER(double, double)
+STDMSG_WRITER(float, float)
+STDMSG_WRITER(int32, int32_t)
+STDMSG_WRITER(int64, int64_t)
+STDMSG_WRITER(uint32, uint32_t)
+STDMSG_WRITER(uint64, uint64_t)
+STDMSG_WRITER(bool, bool)
+#undef STDMSG_WRITER
--- a/upb/handlers.h
+++ b/upb/handlers.h
--- a/upb/msg.c
+++ b/upb/msg.c
@ -1,52 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2010 Google Inc.  See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- *
- */
-
-#include "upb/upb.h"
-#include "upb/msg.h"
-
-#define UPB_ACCESSOR(type, ctype)                                             \
-  upb_flow_t upb_stdmsg_set ## type (void *_m, upb_value fval,                \
-                                     upb_value val) {                         \
-    assert(_m != NULL);                                                       \
-    const upb_fielddef *f = upb_value_getfielddef(fval);                      \
-    uint8_t *m = _m;                                                          \
-    /* Hasbit is set automatically by the handlers. */                        \
-    *(ctype*)&m[f->offset] = upb_value_get ## type(val);                      \
-    return UPB_CONTINUE;                                                      \
-  }                                                                           \
-
-UPB_ACCESSOR(double, double)
-UPB_ACCESSOR(float, float)
-UPB_ACCESSOR(int32, int32_t)
-UPB_ACCESSOR(int64, int64_t)
-UPB_ACCESSOR(uint32, uint32_t)
-UPB_ACCESSOR(uint64, uint64_t)
-UPB_ACCESSOR(bool, bool)
-UPB_ACCESSOR(ptr, void*)
-#undef UPB_ACCESSORS
-
-static void upb_accessors_onfreg(void *c, upb_fhandlers *fh,
-                                 const upb_fielddef *f) {
-  (void)c;
-  if (f->accessor) {
-    upb_fhandlers_setfval(fh, f->fval);
-    if (upb_isseq(f)) {
-      upb_fhandlers_setstartseq(fh, f->accessor->startseq);
-      upb_fhandlers_setvalue(fh, f->accessor->append);
-      upb_fhandlers_setstartsubmsg(fh, f->accessor->appendsubmsg);
-    } else {
-      upb_fhandlers_setvalue(fh, f->accessor->set);
-      upb_fhandlers_setstartsubmsg(fh, f->accessor->startsubmsg);
-      upb_fhandlers_sethasbit(fh, f->hasbit);
-    }
-  }
-}
-
-upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, const upb_msgdef *m) {
-  return upb_handlers_regmsgdef(h, m, NULL, &upb_accessors_onfreg, NULL);
-}
--- a/upb/msg.h
+++ b/upb/msg.h
@ -1,153 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2010-2011 Google Inc.  See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- *
- * Routines for reading and writing message data to an in-memory structure,
- * similar to a C struct.
- *
- * upb does not define one single message object that everyone must use.
- * Rather it defines an abstract interface for reading and writing members
- * of a message object, and all of the parsers and serializers use this
- * abstract interface.  This allows upb's parsers and serializers to be used
- * regardless of what memory management scheme or synchronization model the
- * application is using.
- *
- * A standard set of accessors is provided for doing simple reads and writes at
- * a known offset into the message.  These accessors should be used when
- * possible, because they are specially optimized -- for example, the JIT can
- * recognize them and emit specialized code instead of having to call the
- * function at all.  The application can substitute its own accessors when the
- * standard accessors are not suitable.
- */
-
-#ifndef UPB_MSG_H
-#define UPB_MSG_H
-
-#include <stdlib.h>
-#include "upb/def.h"
-#include "upb/handlers.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/* upb_accessor ***************************************************************/
-
-// A upb_accessor is a table of function pointers for doing reads and writes
-// for one specific upb_fielddef.  Each field has a separate accessor, which
-// lives in the fielddef.
-
-typedef bool upb_has_reader(const void *m, upb_value fval);
-typedef upb_value upb_value_reader(const void *m, upb_value fval);
-
-typedef const void *upb_seqbegin_handler(const void *s);
-typedef const void *upb_seqnext_handler(const void *s, const void *iter);
-typedef upb_value upb_seqget_handler(const void *iter);
-INLINE bool upb_seq_done(const void *iter) { return iter == NULL; }
-
-typedef struct _upb_accessor_vtbl {
-  // Writers.  These take an fval as a parameter because the callbacks are used
-  // as upb_handlers, but the fval is always the fielddef for that field.
-  upb_startfield_handler *startsubmsg;     // Non-repeated submsg fields.
-  upb_value_handler      *set;             // Non-repeated scalar fields.
-  upb_startfield_handler *startseq;        // Repeated fields only.
-  upb_startfield_handler *appendsubmsg;    // Repeated submsg fields.
-  upb_value_handler      *append;          // Repeated scalar fields.
-
-  // TODO: expect to also need endsubmsg and endseq.
-
-  // Readers.
-  upb_has_reader         *has;
-  upb_value_reader       *getseq;
-  upb_value_reader       *get;
-  upb_seqbegin_handler   *seqbegin;
-  upb_seqnext_handler    *seqnext;
-  upb_seqget_handler     *seqget;
-} upb_accessor_vtbl;
-
-// Registers handlers for writing into a message of the given type using
-// whatever accessors it has defined.
-upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, const upb_msgdef *m);
-
-INLINE void upb_msg_clearbit(void *msg, const upb_fielddef *f) {
-  ((char*)msg)[f->hasbit / 8] &= ~(1 << (f->hasbit % 8));
-}
-
-/* upb_msg/upb_seq ************************************************************/
-
-// These accessor functions are simply convenience methods for reading or
-// writing to a message through its accessors.
-
-INLINE bool upb_msg_has(const void *m, const upb_fielddef *f) {
-  return f->accessor && f->accessor->has(m, f->fval);
-}
-
-// May only be called for fields that have accessors.
-INLINE upb_value upb_msg_get(const void *m, const upb_fielddef *f) {
-  assert(f->accessor && !upb_isseq(f));
-  return f->accessor->get(m, f->fval);
-}
-
-// May only be called for fields that have accessors.
-INLINE upb_value upb_msg_getseq(const void *m, const upb_fielddef *f) {
-  assert(f->accessor && upb_isseq(f));
-  return f->accessor->getseq(m, f->fval);
-}
-
-INLINE void upb_msg_set(void *m, const upb_fielddef *f, upb_value val) {
-  assert(f->accessor);
-  f->accessor->set(m, f->fval, val);
-}
-
-INLINE const void *upb_seq_begin(const void *s, const upb_fielddef *f) {
-  assert(f->accessor);
-  return f->accessor->seqbegin(s);
-}
-INLINE const void *upb_seq_next(const void *s, const void *iter,
-                                const upb_fielddef *f) {
-  assert(f->accessor);
-  assert(!upb_seq_done(iter));
-  return f->accessor->seqnext(s, iter);
-}
-INLINE upb_value upb_seq_get(const void *iter, const upb_fielddef *f) {
-  assert(f->accessor);
-  assert(!upb_seq_done(iter));
-  return f->accessor->seqget(iter);
-}
-
-INLINE bool upb_msg_has_named(const void *m, const upb_msgdef *md,
-                              const char *field_name) {
-  const upb_fielddef *f = upb_msgdef_ntof(md, field_name);
-  return f && upb_msg_has(m, f);
-}
-
-INLINE bool upb_msg_get_named(const void *m, const upb_msgdef *md,
-                                   const char *field_name, upb_value *val) {
-  const upb_fielddef *f = upb_msgdef_ntof(md, field_name);
-  if (!f) return false;
-  *val = upb_msg_get(m, f);
-  return true;
-}
-
-// Value writers for every in-memory type: write the data to a known offset
-// from the closure "c."
-//
-// TODO(haberman): instead of having standard writer functions, should we have
-// a bool in the accessor that says "write raw value to the field's offset"?
-upb_flow_t upb_stdmsg_setint64(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setint32(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setuint64(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setuint32(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setdouble(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setfloat(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setptr(void *c, upb_value fval, upb_value val);
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@ -5,17 +5,13 @@
 * Author: Josh Haberman <jhaberman@gmail.com>
 */

+#include <inttypes.h>
 #include <stddef.h>
 #include <stdlib.h>
 #include "upb/bytestream.h"
-#include "upb/msg.h"
 #include "upb/pb/decoder.h"
 #include "upb/pb/varint.h"

-#ifndef UINT32_MAX
-#define UINT32_MAX 0xffffffff
-#endif
-
 typedef struct {
  uint8_t native_wire_type;
  bool is_numeric;
@ -62,11 +58,12 @@ static const upb_decoder_typeinfo upb_decoder_types[] = {
 #include "upb/pb/decoder_x64.h"
 #endif

-upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) {
+upb_decoderplan *upb_decoderplan_new(const upb_handlers *h, bool allowjit) {
+  UPB_UNUSED(allowjit);
  upb_decoderplan *p = malloc(sizeof(*p));
+  assert(upb_handlers_isfrozen(h));
  p->handlers = h;
-  upb_handlers_ref(h);
-  h->should_jit = allowjit;
+  upb_handlers_ref(h, p);
 #ifdef UPB_USE_JIT_X64
  p->jit_code = NULL;
  if (allowjit) upb_decoderplan_makejit(p);
@ -76,7 +73,7 @@ upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) {

 void upb_decoderplan_unref(upb_decoderplan *p) {
  // TODO: make truly refcounted.
-  upb_handlers_unref(p->handlers);
+  upb_handlers_unref(p->handlers, p);
 #ifdef UPB_USE_JIT_X64
  if (p->jit_code) upb_decoderplan_freejit(p);
 #endif
@ -100,8 +97,8 @@ bool upb_decoderplan_hasjitcode(upb_decoderplan *p) {
 // configuration.  But emperically on a Core i7, performance increases 30-50%
 // with these annotations.  Every instance where these appear, gcc 4.2.1 made
 // the wrong decision and degraded performance in benchmarks.
-#define FORCEINLINE static __attribute__((__always_inline__))
-#define NOINLINE static __attribute__((__noinline__))
+#define FORCEINLINE static inline __attribute__((always_inline))
+#define NOINLINE static __attribute__((noinline))

 UPB_NORETURN static void upb_decoder_exitjmp(upb_decoder *d) {
  // Resumable decoder would back out to completed_ptr (and possibly get a
@ -141,14 +138,23 @@ uint64_t upb_decoder_bufendofs(upb_decoder *d) {
  return d->bufstart_ofs + (d->end - d->buf);
 }

+static bool upb_decoder_islegalend(upb_decoder *d) {
+  if (d->top == d->stack) return true;
+  if (d->top - 1 == d->stack &&
+      d->top->is_sequence && !d->top->is_packed) return true;
+  return false;
+}
+
+// Calculates derived values that we cache for speed.  These reflect a
+// combination of the current buffer and the stack, so must be called whenever
+// either is updated.
 static void upb_decoder_setmsgend(upb_decoder *d) {
-  upb_dispatcher_frame *f = d->dispatcher.top;
+  upb_decoder_frame *f = d->top;
  size_t delimlen = f->end_ofs - d->bufstart_ofs;
  size_t buflen = d->end - d->buf;
  d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ?
      d->buf + delimlen : NULL;  // NULL if not in this buf.
  d->top_is_packed = f->is_packed;
-  d->dispatch_table = &d->dispatcher.msgent->fieldtab;
 }

 static void upb_decoder_skiptonewbuf(upb_decoder *d, uint64_t ofs) {
@ -201,11 +207,11 @@ static void upb_pullbuf(upb_decoder *d) {
  if (!upb_trypullbuf(d)) upb_decoder_abortjmp(d, "Unexpected EOF");
 }

-void upb_decoder_checkpoint(upb_decoder *d) {
+static void upb_decoder_checkpoint(upb_decoder *d) {
  upb_byteregion_discard(d->input, upb_decoder_offset(d));
 }

-void upb_decoder_discardto(upb_decoder *d, uint64_t ofs) {
+static void upb_decoder_discardto(upb_decoder *d, uint64_t ofs) {
  if (ofs <= upb_decoder_bufendofs(d)) {
    upb_decoder_advance(d, ofs - upb_decoder_offset(d));
  } else {
@ -214,7 +220,7 @@ void upb_decoder_discardto(upb_decoder *d, uint64_t ofs) {
  upb_decoder_checkpoint(d);
 }

-void upb_decoder_discard(upb_decoder *d, size_t bytes) {
+static void upb_decoder_discard(upb_decoder *d, size_t bytes) {
  upb_decoder_discardto(d, upb_decoder_offset(d) + bytes);
 }

@ -259,7 +265,7 @@ done:
 // Returns true on success or false if we've hit a valid EOF.
 FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) {
  if (upb_decoder_bufleft(d) == 0 &&
-      upb_dispatcher_islegalend(&d->dispatcher) &&
+      upb_decoder_islegalend(d) &&
      !upb_trypullbuf(d)) {
    return false;
  }
@ -319,21 +325,45 @@ FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) {
  return u64;  // TODO: proper byte swapping for big-endian machines.
 }

-INLINE upb_byteregion *upb_decode_string(upb_decoder *d) {
-  uint32_t strlen = upb_decode_varint32(d);
-  uint64_t offset = upb_decoder_offset(d);
-  if (offset + strlen > upb_byteregion_endofs(d->input))
-    upb_decoder_abortjmp(d, "Unexpected EOF");
-  upb_byteregion_reset(&d->str_byteregion, d->input, offset, strlen);
-  // Could make it an option on the callback whether we fetchall() first or not.
-  if (upb_byteregion_fetchall(&d->str_byteregion) != UPB_BYTE_OK)
-    upb_decoder_abortjmp(d, "Couldn't fetchall() on string.");
-  upb_decoder_discardto(d, offset + strlen);
-  return &d->str_byteregion;
+INLINE void upb_push_msg(upb_decoder *d, const upb_fielddef *f, uint64_t end) {
+  upb_decoder_frame *fr = d->top + 1;
+  if (!upb_sink_startsubmsg(&d->sink, f) || fr > d->limit) {
+    upb_decoder_abortjmp(d, "Nesting too deep.");
+  }
+  fr->f = f;
+  fr->is_sequence = false;
+  fr->is_packed = false;
+  fr->end_ofs = end;
+  fr->group_fieldnum = end == UPB_NONDELIMITED ?
+      (int32_t)upb_fielddef_number(f) : -1;
+  d->top = fr;
+  upb_decoder_setmsgend(d);
 }

-INLINE void upb_push_msg(upb_decoder *d, upb_fhandlers *f, uint64_t end) {
-  upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end;
+INLINE void upb_push_seq(upb_decoder *d, const upb_fielddef *f, bool packed,
+                         uint64_t end_ofs) {
+  upb_decoder_frame *fr = d->top + 1;
+  if (!upb_sink_startseq(&d->sink, f) || fr > d->limit) {
+    upb_decoder_abortjmp(d, "Nesting too deep.");
+  }
+  fr->f = f;
+  fr->is_sequence = true;
+  fr->group_fieldnum = -1;
+  fr->is_packed = packed;
+  fr->end_ofs = end_ofs;
+  d->top = fr;
+  upb_decoder_setmsgend(d);
+}
+
+INLINE void upb_pop_submsg(upb_decoder *d) {
+  upb_sink_endsubmsg(&d->sink, d->top->f);
+  d->top--;
+  upb_decoder_setmsgend(d);
+}
+
+INLINE void upb_pop_seq(upb_decoder *d) {
+  upb_sink_endseq(&d->sink, d->top->f);
+  d->top--;
  upb_decoder_setmsgend(d);
 }

@ -344,13 +374,14 @@ INLINE void upb_push_msg(upb_decoder *d, upb_fhandlers *f, uint64_t end) {
 // properly sign-extended.  We could detect this and error about the data loss,
 // but proto2 does not do this, so we pass.

-#define T(type, wt, valtype, convfunc) \
-  INLINE void upb_decode_ ## type(upb_decoder *d, upb_fhandlers *f) { \
-    upb_value val; \
-    upb_value_set ## valtype(&val, (convfunc)(upb_decode_ ## wt(d))); \
-    upb_dispatch_value(&d->dispatcher, f, val); \
+#define T(type, wt, name, convfunc) \
+  INLINE void upb_decode_ ## type(upb_decoder *d, const upb_fielddef *f) { \
+    upb_sink_put ## name(&d->sink, f, (convfunc)(upb_decode_ ## wt(d))); \
  } \

+static double  upb_asdouble(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
+static float   upb_asfloat(uint32_t n)  { float  f; memcpy(&f, &n, 4); return f; }
+
 T(INT32,    varint,  int32,  int32_t)
 T(INT64,    varint,  int64,  int64_t)
 T(UINT32,   varint,  uint32, uint32_t)
@ -361,43 +392,44 @@ T(SFIXED32, fixed32, int32,  int32_t)
 T(SFIXED64, fixed64, int64,  int64_t)
 T(BOOL,     varint,  bool,   bool)
 T(ENUM,     varint,  int32,  int32_t)
+T(DOUBLE,   fixed64, double, upb_asdouble)
+T(FLOAT,    fixed32, float,  upb_asfloat)
 T(SINT32,   varint,  int32,  upb_zzdec_32)
 T(SINT64,   varint,  int64,  upb_zzdec_64)
-T(STRING,   string,  byteregion, upb_byteregion*)
-
 #undef T

-INLINE void upb_decode_DOUBLE(upb_decoder *d, upb_fhandlers *f) {
-  upb_value val;
-  double dbl;
-  uint64_t wireval = upb_decode_fixed64(d);
-  memcpy(&dbl, &wireval, 8);
-  upb_value_setdouble(&val, dbl);
-  upb_dispatch_value(&d->dispatcher, f, val);
-}
-
-INLINE void upb_decode_FLOAT(upb_decoder *d, upb_fhandlers *f) {
-  upb_value val;
-  float flt;
-  uint64_t wireval = upb_decode_fixed32(d);
-  memcpy(&flt, &wireval, 4);
-  upb_value_setfloat(&val, flt);
-  upb_dispatch_value(&d->dispatcher, f, val);
-}
-
-static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) {
+static void upb_decode_GROUP(upb_decoder *d, const upb_fielddef *f) {
  upb_push_msg(d, f, UPB_NONDELIMITED);
 }
-static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
-  (void)f;
-  upb_dispatch_endsubmsg(&d->dispatcher);
-  upb_decoder_setmsgend(d);
-}
-static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
+
+static void upb_decode_MESSAGE(upb_decoder *d, const upb_fielddef *f) {
  uint32_t len = upb_decode_varint32(d);
  upb_push_msg(d, f, upb_decoder_offset(d) + len);
 }

+static void upb_decode_STRING(upb_decoder *d, const upb_fielddef *f) {
+  uint32_t strlen = upb_decode_varint32(d);
+  uint64_t offset = upb_decoder_offset(d);
+  uint64_t end = offset + strlen;
+  if (end > upb_byteregion_endofs(d->input))
+    upb_decoder_abortjmp(d, "Unexpected EOF");
+  upb_sink_startstr(&d->sink, f, strlen);
+  while (strlen > 0) {
+    if (upb_byteregion_available(d->input, offset) == 0)
+      upb_pullbuf(d);
+    size_t len;
+    const char *ptr = upb_byteregion_getptr(d->input, offset, &len);
+    len = UPB_MIN(len, strlen);
+    len = upb_sink_putstring(&d->sink, f, ptr, len);
+    if (len > strlen)
+      upb_decoder_abortjmp(d, "Skipped too many bytes.");
+    offset += len;
+    strlen -= len;
+    upb_decoder_discardto(d, offset);
+  }
+  upb_sink_endstr(&d->sink, f);
+}
+

 /* The main decoding loop *****************************************************/

@ -410,33 +442,33 @@ static void upb_decoder_checkdelim(upb_decoder *d) {
  // handler).
  while (d->delim_end != NULL && d->ptr >= d->delim_end) {
    if (d->ptr > d->delim_end) upb_decoder_abortjmp(d, "Bad submessage end");
-    if (d->dispatcher.top->is_sequence) {
-      upb_dispatch_endseq(&d->dispatcher);
+    if (d->top->is_sequence) {
+      upb_pop_seq(d);
    } else {
-      upb_dispatch_endsubmsg(&d->dispatcher);
+      upb_pop_submsg(d);
    }
-    upb_decoder_setmsgend(d);
  }
 }

-INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
+INLINE const upb_fielddef *upb_decode_tag(upb_decoder *d) {
  while (1) {
    uint32_t tag;
    if (!upb_trydecode_varint32(d, &tag)) return NULL;
    uint8_t wire_type = tag & 0x7;
-    uint32_t fieldnum = tag >> 3;
-    const upb_value *val = upb_inttable_lookup32(d->dispatch_table, fieldnum);
-    upb_fhandlers *f = val ? upb_value_getptr(*val) : NULL;
-    bool is_packed = false;
+    uint32_t fieldnum = tag >> 3; const upb_fielddef *f = NULL;
+    const upb_handlers *h = upb_sink_tophandlers(&d->sink);
+    f = upb_msgdef_itof(upb_handlers_msgdef(h), fieldnum);
+    bool packed = false;

    if (f) {
      // Wire type check.
-      if (wire_type == upb_decoder_types[f->type].native_wire_type) {
+      upb_fieldtype_t type = upb_fielddef_type(f);
+      if (wire_type == upb_decoder_types[type].native_wire_type) {
        // Wire type is ok.
      } else if ((wire_type == UPB_WIRE_TYPE_DELIMITED &&
-                 upb_decoder_types[f->type].is_numeric)) {
+                 upb_decoder_types[type].is_numeric)) {
        // Wire type is ok (and packed).
-        is_packed = true;
+        packed = true;
      } else {
        f = NULL;
      }
@ -445,29 +477,24 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
    // There are no explicit "startseq" or "endseq" markers in protobuf
    // streams, so we have to infer them by noticing when a repeated field
    // starts or ends.
-    upb_dispatcher_frame *fr = d->dispatcher.top;
+    upb_decoder_frame *fr = d->top;
    if (fr->is_sequence && fr->f != f) {
-      upb_dispatch_endseq(&d->dispatcher);
-      upb_decoder_setmsgend(d);
-      fr = d->dispatcher.top;
+      upb_pop_seq(d);
+      fr = d->top;
    }
-    if (f && f->repeated && !fr->is_sequence) {
-      upb_dispatcher_frame *fr2 = upb_dispatch_startseq(&d->dispatcher, f);
-      if (is_packed) {
-        // Packed primitive field.
+
+    if (f && upb_fielddef_isseq(f) && !fr->is_sequence) {
+      if (packed) {
        uint32_t len = upb_decode_varint32(d);
-        fr2->end_ofs = upb_decoder_offset(d) + len;
-        fr2->is_packed = true;
+        upb_push_seq(d, f, true, upb_decoder_offset(d) + len);
      } else {
-        // Non-packed field -- this tag pertains to only a single message.
-        fr2->end_ofs = fr->end_ofs;
+        upb_push_seq(d, f, false, fr->end_ofs);
      }
-      upb_decoder_setmsgend(d);
    }

    if (f) return f;

-    // Unknown field.
+    // Unknown field or ENDGROUP.
    if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER)
      upb_decoder_abortjmp(d, "Invalid field number");
    switch (wire_type) {
@ -479,7 +506,12 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
      case UPB_WIRE_TYPE_START_GROUP:
        upb_decoder_abortjmp(d, "Can't handle unknown groups yet");
      case UPB_WIRE_TYPE_END_GROUP:
-        upb_decoder_abortjmp(d, "Unmatched ENDGROUP tag");
+        if (fieldnum != fr->group_fieldnum)
+          upb_decoder_abortjmp(d, "Unmatched ENDGROUP tag");
+        upb_sink_endsubmsg(&d->sink, fr->f);
+        d->top--;
+        upb_decoder_setmsgend(d);
+        break;
      default:
        upb_decoder_abortjmp(d, "Invalid wire type");
    }
@ -495,30 +527,30 @@ upb_success_t upb_decoder_decode(upb_decoder *d) {
    assert(!upb_ok(&d->status));
    return UPB_ERROR;
  }
-  upb_dispatch_startmsg(&d->dispatcher);
+  upb_sink_startmsg(&d->sink);
  // Prime the buf so we can hit the JIT immediately.
  upb_trypullbuf(d);
-  upb_fhandlers *f = d->dispatcher.top->f;
+  const upb_fielddef *f = d->top->f;
  while(1) {
-    upb_decoder_checkdelim(d);
 #ifdef UPB_USE_JIT_X64
    upb_decoder_enterjit(d);
    upb_decoder_checkpoint(d);
+    upb_decoder_setmsgend(d);
 #endif
+    upb_decoder_checkdelim(d);
    if (!d->top_is_packed) f = upb_decode_tag(d);
    if (!f) {
      // Sucessful EOF.  We may need to dispatch a top-level implicit frame.
-      if (d->dispatcher.top->is_sequence) {
-        assert(d->dispatcher.top == d->dispatcher.stack + 1);
-        upb_dispatch_endseq(&d->dispatcher);
+      if (d->top->is_sequence) {
+        assert(d->sink.top == d->sink.stack + 1);
+        upb_pop_seq(d);
      }
-      assert(d->dispatcher.top == d->dispatcher.stack);
-      upb_dispatch_endmsg(&d->dispatcher, &d->status);
+      assert(d->top == d->stack);
+      upb_sink_endmsg(&d->sink, &d->status);
      return UPB_OK;
    }

-    switch (f->type) {
-      case UPB_TYPE_ENDGROUP:  upb_endgroup(d, f);        break;
+    switch (upb_fielddef_type(f)) {
      case UPB_TYPE(DOUBLE):   upb_decode_DOUBLE(d, f);   break;
      case UPB_TYPE(FLOAT):    upb_decode_FLOAT(d, f);    break;
      case UPB_TYPE(INT64):    upb_decode_INT64(d, f);    break;
@ -545,28 +577,29 @@ upb_success_t upb_decoder_decode(upb_decoder *d) {

 void upb_decoder_init(upb_decoder *d) {
  upb_status_init(&d->status);
-  upb_dispatcher_init(&d->dispatcher, &d->status, &upb_decoder_exitjmp2, d);
  d->plan = NULL;
  d->input = NULL;
+  d->limit = &d->stack[UPB_MAX_NESTING];
 }

-void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset) {
-  assert(msg_offset >= 0);
-  assert(msg_offset < p->handlers->msgs_len);
+void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p) {
  d->plan = p;
-  d->msg_offset = msg_offset;
  d->input = NULL;
+  upb_sink_init(&d->sink, p->handlers);
 }

 void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input,
-                            void *closure) {
+                            void *c) {
  assert(d->plan);
-  upb_dispatcher_frame *f =
-      upb_dispatcher_reset(&d->dispatcher, closure, d->plan->handlers->msgs[0]);
  upb_status_clear(&d->status);
-  f->end_ofs = UPB_NONDELIMITED;
+  upb_sink_reset(&d->sink, c);
  d->input = input;
-  d->str_byteregion.bytesrc = input->bytesrc;
+
+  d->top = d->stack;
+  d->top->is_sequence = false;
+  d->top->is_packed = false;
+  d->top->group_fieldnum = UINT32_MAX;
+  d->top->end_ofs = UPB_NONDELIMITED;

  // Protect against assert in skiptonewbuf().
  d->bufstart_ofs = 0;
@ -576,6 +609,5 @@ void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input,
 }

 void upb_decoder_uninit(upb_decoder *d) {
-  upb_dispatcher_uninit(&d->dispatcher);
  upb_status_uninit(&d->status);
 }
--- a/upb/pb/decoder.h
+++ b/upb/pb/decoder.h
@ -13,9 +13,8 @@
 #define UPB_DECODER_H_

 #include <setjmp.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include "upb/handlers.h"
+#include "upb/bytestream.h"
+#include "upb/sink.h"

 #ifdef __cplusplus
 extern "C" {
@ -34,9 +33,12 @@ extern "C" {
 struct _upb_decoderplan;
 typedef struct _upb_decoderplan upb_decoderplan;

-// TODO: add parameter for a list of other decoder plans that we can share
-// generated code with.
-upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit);
+// TODO(haberman):
+// - add support for letting any message in the plan be at the top level.
+// - make this object a handlers instead (when bytesrc/bytesink are merged
+//   into handlers).
+// - add support for sharing code with previously-built plans/handlers.
+upb_decoderplan *upb_decoderplan_new(const upb_handlers *h, bool allowjit);
 void upb_decoderplan_unref(upb_decoderplan *p);

 // Returns true if the plan contains JIT-ted code.  This may not be the same as
@ -49,15 +51,28 @@ bool upb_decoderplan_hasjitcode(upb_decoderplan *p);

 struct dasm_State;

+typedef struct {
+  const upb_fielddef *f;
+  uint64_t end_ofs;
+  uint32_t group_fieldnum;  // UINT32_MAX for non-groups.
+  bool is_sequence;   // frame represents seq or submsg? (f might be both).
+  bool is_packed;     // !upb_issubmsg(f) && end_ofs != UINT64_MAX
+                      // (strings aren't pushed).
+} upb_decoder_frame;
+
 typedef struct _upb_decoder {
  upb_decoderplan *plan;
-  int             msg_offset;      // Which message from the plan is top-level.
  upb_byteregion  *input;          // Input data (serialized), not owned.
-  upb_dispatcher  dispatcher;      // Dispatcher to which we push parsed data.
  upb_status      status;          // Where we store errors that occur.
-  upb_byteregion  str_byteregion;  // For passing string data to callbacks.

-  upb_inttable    *dispatch_table;
+  // Where we push parsed data.
+  // TODO(haberman): make this a pointer and make upb_decoder_resetinput() take
+  // one of these instead of a void*.
+  upb_sink        sink;
+
+  // Our internal stack.
+  upb_decoder_frame *top, *limit;
+  upb_decoder_frame stack[UPB_MAX_NESTING];

  // Current input buffer and its stream offset.
  const char *buf, *ptr, *end;
@ -70,7 +85,11 @@ typedef struct _upb_decoder {

 #ifdef UPB_USE_JIT_X64
  // For JIT, which doesn't do bounds checks in the middle of parsing a field.
-  const char *jit_end, *effective_end;  // == MIN(jit_end, submsg_end)
+  const char *jit_end, *effective_end;  // == MIN(jit_end, delim_end)
+
+  // Used momentarily by the generated code to store a value while a user
+  // function is called.
+  uint32_t tmp_len;
 #endif

  // For exiting the decoder on error.
@ -88,7 +107,7 @@ void upb_decoder_uninit(upb_decoder *d);
 // must live until the decoder is destroyed or reset to a different plan.
 //
 // Must be called before upb_decoder_resetinput() or upb_decoder_decode().
-void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset);
+void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p);

 // Resets the input of an already-allocated decoder.  This puts it in a state
 // where it has not seen any data, and expects the next data to be from the
@ -111,7 +130,8 @@ INLINE const upb_status *upb_decoder_status(upb_decoder *d) {
 // Implementation details

 struct _upb_decoderplan {
-  upb_handlers *handlers;  // owns reference.
+  // The top-level handlers that this plan calls into.  We own a ref.
+  const upb_handlers *handlers;

 #ifdef UPB_USE_JIT_X64
  // JIT-generated machine code (else NULL).
@ -119,8 +139,23 @@ struct _upb_decoderplan {
  size_t jit_size;
  char *debug_info;

+  // For storing upb_jitmsginfo, which contains per-msg runtime data needed
+  // by the JIT.
+  // Maps upb_handlers* -> upb_jitmsginfo.
+  upb_inttable msginfo;
+
+  // The following members are used only while the JIT is being built.
+
  // This pointer is allocated by dasm_init() and freed by dasm_free().
  struct dasm_State *dynasm;
+
+  // For storing pclabel bases while we are building the JIT.
+  // Maps (upb_handlers* or upb_fielddef*) -> int32 pclabel_base
+  upb_inttable pclabels;
+
+  // This is not the same as len(pclabels) because the table only contains base
+  // offsets for each def, but each def can have many pclabels.
+  uint32_t pclabel_count;
 #endif
 };

--- a/upb/pb/decoder_x64.dasc
+++ b/upb/pb/decoder_x64.dasc
@ -12,6 +12,7 @@
 |// function) we must respect alignment rules.  All x86-64 systems require
 |// 16-byte stack alignment.

+#include <stdio.h>
 #include <sys/mman.h>
 #include "dynasm/dasm_x86.h"

@ -28,6 +29,44 @@
 #define MAP_32BIT 0
 #endif

+// These are used to track jump targets for messages and fields.
+enum {
+  STARTMSG = 0,
+  AFTER_STARTMSG = 1,
+  ENDOFBUF = 2,
+  ENDOFMSG = 3,
+  DYNDISPATCH = 4,
+  TOTAL_MSG_PCLABELS = 5,
+};
+
+enum {
+  FIELD = 0,
+  FIELD_NO_TYPECHECK = 1,
+  TOTAL_FIELD_PCLABELS = 2,
+};
+
+typedef struct {
+  uint32_t max_field_number;
+  // Currently keyed on field number.  Could also try keying it
+  // on encoded or decoded tag, or on encoded field number.
+  void **tablearray;
+  // Pointer to the JIT code for parsing this message.
+  void *jit_func;
+} upb_jitmsginfo;
+
+static uint32_t upb_getpclabel(upb_decoderplan *plan, const void *obj, int n) {
+  const upb_value *v = upb_inttable_lookupptr(&plan->pclabels, obj);
+  assert(v);
+  return upb_value_getuint32(*v) + n;
+}
+
+static upb_jitmsginfo *upb_getmsginfo(upb_decoderplan *plan,
+                                      const upb_handlers *h) {
+  const upb_value *v = upb_inttable_lookupptr(&plan->msginfo, h);
+  assert(v);
+  return upb_value_getptr(*v);
+}
+
 // To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code
 // at runtime.  GDB 7.x+ has defined an interface for doing this, and these
 // structure/function defintions are copied out of gdb/jit.h
@ -66,7 +105,9 @@ typedef struct {

 gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL};

-void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); }
+void __attribute__((noinline)) __jit_debug_register_code() {
+  __asm__ __volatile__("");
+}

 void upb_reg_jit_gdb(upb_decoderplan *plan) {
  // Create debug info.
@ -120,7 +161,8 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
 |.define ARG3_32,   edx
 |.define ARG3_64,   rdx
 |.define ARG4_64,   rcx
-|.define ARG5_32,   r8d
+|.define XMMARG1,   xmm0
+
 |
 |// Register allocation / type map.
 |// ALL of the code in this file uses these register allocations.
@ -128,13 +170,15 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
 |// conventions, but of course when calling to user callbacks we must.
 |.define PTR,       rbx  // Writing this to DECODER->ptr commits our progress.
 |.define CLOSURE,   r12
-|.type   FRAME,     upb_dispatcher_frame, r13
-|.type   BYTEREGION,upb_byteregion, r14
+|.type   SINKFRAME, upb_sink_frame, r13
+|.type   FRAME,     upb_decoder_frame, r14
 |.type   DECODER,   upb_decoder, r15
-|.type   STDARRAY,  upb_stdarray
 |
 |.macro callp, addr
 || upb_assert_notnull(addr);
+|// TODO(haberman): fix this.  I believe the predicate we should actually be
+|// testing is whether the jump distance is greater than INT32_MAX, not the
+|// absolute address of the target.
 || if ((uintptr_t)addr < 0xffffffff) {
     |  call   &addr
 || } else {
@ -143,14 +187,22 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
 || }
 |.endmacro
 |
-|// Checks PTR for end-of-buffer.
-|.macro check_eob, m
+|// Checkpoints our progress by writing PTR to DECODER, and
+|// checks for end-of-buffer.
+|.macro checkpoint, h
+|  mov   DECODER->ptr, PTR
 |  cmp   PTR, DECODER->effective_end
-|| if (m->is_group) {
-     |  jae  ->exit_jit
-|| } else {
-     |  jae  =>m->jit_endofbuf_pclabel
-|| }
+|  jae   =>upb_getpclabel(plan, h, ENDOFBUF)
+|.endmacro
+|
+|.macro check_bool_ret
+|  test  al, al
+|  jz    ->exit_jit
+|.endmacro
+|
+|.macro check_ptr_ret
+|  test  rax, rax
+|  jz    ->exit_jit
 |.endmacro
 |
 |// Decodes varint from [PTR + offset] -> ARG3.
@ -172,8 +224,7 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
 |  mov    ARG1_64, rax
 |  mov    ARG2_32, ARG3_32
 |  callp  upb_vdecode_max8_fast
-|  test   rax, rax
-|  jz     ->exit_jit   // >10-byte varint.
+|  check_ptr_ret  // Check for unterminated, >10-byte varint.
 |9:
 |.endmacro
 |
@ -187,74 +238,103 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
 |// Could specialize this by avoiding the value masking: could just key the
 |// table on the raw (length-masked) varint to save 3-4 cycles of latency.
 |// Currently only support tables where all entries are in the array part.
-|.macro dyndispatch_, m
-|=>m->jit_dyndispatch_pclabel:
+|.macro dyndispatch_, h
+|=>upb_getpclabel(plan, h, DYNDISPATCH):
 |  decode_loaded_varint, 0
 |  mov  ecx, edx
 |  shr  ecx, 3
-|  and  edx, 0x7   // For the type check that will happen later.
-|  cmp  ecx, m->max_field_number  // Bounds-check the field.
-|  ja   ->exit_jit                // In the future; could be unknown label
-|| if ((uintptr_t)m->tablearray < 0xffffffff) {
+|  and  edx, 0x7   // Note: this value is used in the FIELD pclabel below.
+|  cmp  edx, UPB_WIRE_TYPE_END_GROUP
+|  je   >1
+|| upb_jitmsginfo *mi = upb_getmsginfo(plan, h);
+|  cmp  ecx, mi->max_field_number  // Bounds-check the field.
+|  ja   ->exit_jit                 // In the future; could be unknown label
+|| if ((uintptr_t)mi->tablearray < 0xffffffff) {
 |    // TODO: support hybrid array/hash tables.
-|    mov  rax, qword [rcx*8 + m->tablearray]
+|    mov  rax, qword [rcx*8 + mi->tablearray]
 || } else {
-|    mov64  rax, (uintptr_t)m->tablearray
+|    mov64  rax, (uintptr_t)mi->tablearray
 |    mov  rax, qword [rax + rcx*8]
 || }
 |  jmp  rax  // Dispatch: unpredictable jump.
+|1:
+|// End group.
+|  cmp  ecx, FRAME->group_fieldnum
+|  jne  ->exit_jit         // Unexpected END_GROUP tag.
+|  mov  PTR, rax   // rax came from decode_loaded_varint
+|  mov  DECODER->ptr, PTR
+|  jmp  =>upb_getpclabel(plan, h, ENDOFMSG)
 |.endmacro
 |
 |.if 1
 |  // Replicated dispatch: larger code, but better branch prediction.
 |  .define dyndispatch, dyndispatch_
 |.else
-|  .macro dyndispatch, m
-|    jmp =>m->jit_dyndispatch_pclabel
+|  // Single dispatch: smaller code, could be faster because of reduced
+|  // icache usage.  We keep this around to allow for easy comparison between
+|  // the two.
+|  .macro dyndispatch, h
+|    jmp =>upb_getpclabel(plan, h, DYNDISPATCH)
 |  .endmacro
 |.endif
 |
 |// Push a stack frame (not the CPU stack, the upb_decoder stack).
-|.macro pushframe, f, end_offset_, is_sequence_
-|  lea   rax, [FRAME + sizeof(upb_dispatcher_frame)]  // rax for shorter addressing.
-|  cmp   rax, qword DECODER->dispatcher.limit
+|.macro pushframe, h, field, end_offset_, endtype
+|// Decoder Frame.
+|  lea   rax, [FRAME + sizeof(upb_decoder_frame)]  // rax for short addressing
+|  cmp   rax, DECODER->limit
 |  jae   ->exit_jit  // Frame stack overflow.
-|  mov64 r8, (uintptr_t)f
-|  mov   qword FRAME:rax->f, r8
+|  mov64 r10, (uintptr_t)field
+|  mov   FRAME:rax->f, r10
 |  mov   qword FRAME:rax->end_ofs, end_offset_
-|  mov   byte FRAME:rax->is_sequence, is_sequence_
-|  mov   DECODER->dispatcher.top, rax
+|  mov   byte FRAME:rax->is_sequence, (endtype == UPB_HANDLER_ENDSEQ)
+|  mov   byte FRAME:rax->is_packed, 0
+|| if (upb_fielddef_type(field) == UPB_TYPE_GROUP &&
+||     endtype == UPB_HANDLER_ENDSUBMSG) {
+|    mov dword FRAME:rax->group_fieldnum, upb_fielddef_number(field)
+|| } else {
+|    mov dword FRAME:rax->group_fieldnum, 0xffffffff
+|| }
+|  mov   DECODER->top, rax
 |  mov   FRAME, rax
+|// Sink Frame.
+|  lea   rcx, [SINKFRAME + sizeof(upb_sink_frame)]  // rcx for short addressing
+|  cmp   rcx, DECODER->sink.limit
+|  jae   ->exit_jit  // Frame stack overflow.
+|  mov   dword SINKFRAME:rcx->end, getselector(field, endtype)
+|| if (upb_fielddef_issubmsg(field)) {
+|    mov64 r9, (uintptr_t)upb_handlers_getsubhandlers(h, field)
+|| } else {
+|    mov64 r9, (uintptr_t)h
+|| }
+|  mov   SINKFRAME:rcx->h, r9
+|  mov   DECODER->sink.top, rcx
+|  mov   SINKFRAME, rcx
 |.endmacro
 |
-|.macro popframe, m
-|  sub   FRAME, sizeof(upb_dispatcher_frame)
-|  mov   DECODER->dispatcher.top, FRAME
-|  setmsgend  m
-|  mov   CLOSURE, FRAME->closure
+|.macro popframe
+|  sub   FRAME, sizeof(upb_decoder_frame)
+|  mov   DECODER->top, FRAME
+|  sub   SINKFRAME, sizeof(upb_sink_frame)
+|  mov   DECODER->sink.top, SINKFRAME
+|  setmsgend
+|  mov   CLOSURE, SINKFRAME->closure
 |.endmacro
 |
-|.macro setmsgend, m
-|    mov    rsi, DECODER->jit_end
-|| if (m->is_group) {
-|    mov64  rax, 0xffffffffffffffff
-|    mov    qword DECODER->delim_end, rax
-|    mov    DECODER->effective_end, rsi
-|| } else {
-|    // Could store a correctly-biased version in the frame, at the cost of
-|    // a larger stack.
-|    mov    eax, dword FRAME->end_ofs
-|    add    rax, qword DECODER->buf
-|    mov    DECODER->delim_end, rax  // delim_end = d->buf + f->end_ofs
-|    cmp    rax, rsi
-|    jb     >8
-|    mov    rax, rsi                  // effective_end = min(d->delim_end, d->jit_end)
+|.macro setmsgend
+|  mov   rsi, DECODER->jit_end
+|  mov   rax, qword FRAME->end_ofs  // Will be UINT64_MAX for groups.
+|  sub   rax, qword DECODER->bufstart_ofs
+|  add   rax, qword DECODER->buf  // rax = d->buf + f->end_ofs - d->bufstart_ofs
+|  jc    >8        // If the addition overflowed, use jit_end
+|  cmp   rax, rsi
+|  ja    >8        // If jit_end is less, use jit_end
+|  mov   rsi, rax  // Use frame end.
 |8:
-|    mov    DECODER->effective_end, rax
-|| }
+|  mov   DECODER->effective_end, rsi
 |.endmacro
 |
-|// rax contains the tag, compare it against "tag", but since it is a varint
+|// rcx contains the tag, compare it against "tag", but since it is a varint
 |// we must only compare as many bytes as actually have data.
 |.macro checktag, tag
 || switch (upb_value_size(tag)) {
@ -279,22 +359,6 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
 ||  }
 |.endmacro
 |
-|// TODO: optimize for 0 (xor) and 32-bits.
-|.macro loadfval, f
-||#ifndef NDEBUG
-||// Since upb_value carries type information in debug mode
-||// only, we need to pass the arguments slightly differently.
-|    mov ARG3_32, f->fval.type
-||#endif
-|| if (f->fval.val.uint64 == 0) {
-|    xor     ARG2_32, ARG2_32
-|| } else if (f->fval.val.uint64 < 0xffffffff) {
-|    mov     ARG2_32, f->fval.val.uint64
-|| } else {
-|    mov64   ARG2_64, f->fval.val.uint64
-|| }
-|.endmacro
-|
 |.macro sethas, reg, hasbit
 || if (hasbit >= 0) {
 |    or   byte [reg + ((uint32_t)hasbit / 8)], (1 << ((uint32_t)hasbit % 8))
@ -304,14 +368,37 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }

 #include <stdlib.h>
 #include "upb/pb/varint.h"
-#include "upb/msg.h"
+
+static upb_selector_t getselector(const upb_fielddef *f,
+                                  upb_handlertype_t type) {
+  upb_selector_t selector;
+  bool ok = upb_getselector(f, type, &selector);
+  UPB_ASSERT_VAR(ok, ok);
+  return selector;
+}
+
+static upb_func *gethandler(const upb_handlers *h, const upb_fielddef *f,
+                            upb_handlertype_t type) {
+  return upb_handlers_gethandler(h, getselector(f, type));
+}
+
+static uintptr_t gethandlerdata(const upb_handlers *h, const upb_fielddef *f,
+                                upb_handlertype_t type) {
+  return (uintptr_t)upb_handlers_gethandlerdata(h, getselector(f, type));
+}

 // Decodes the next val into ARG3, advances PTR.
 static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan,
-                                            uint8_t type, size_t tag_size) {
+                                            uint8_t type, size_t tag_size,
+                                            const upb_handlers *h,
+                                            const upb_fielddef *f) {
  // Decode the value into arg 3 for the callback.
  switch (type) {
    case UPB_TYPE(DOUBLE):
+      |  movsd  XMMARG1, qword [PTR + tag_size]
+      |  add    PTR, 8 + tag_size
+      break;
+
    case UPB_TYPE(FIXED64):
    case UPB_TYPE(SFIXED64):
      |  mov  ARG3_64, qword [PTR + tag_size]
@ -319,6 +406,10 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan,
      break;

    case UPB_TYPE(FLOAT):
+      |  movss  XMMARG1, dword [PTR + tag_size]
+      |  add    PTR, 4 + tag_size
+      break;
+
    case UPB_TYPE(FIXED32):
    case UPB_TYPE(SFIXED32):
      |  mov  ARG3_32, dword [PTR + tag_size]
@ -362,7 +453,7 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan,
      break;

    case UPB_TYPE(STRING):
-    case UPB_TYPE(BYTES):
+    case UPB_TYPE(BYTES): {
      // We only handle the case where the entire string is in our current
      // buf, which sidesteps any security problems.  The C path has more
      // robust checks.
@ -372,22 +463,42 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan,
      |  sub  rdi, rax
      |  cmp  ARG3_64, rdi  // if (len > d->end - str)
      |  ja   ->exit_jit    // Can't deliver, whole string not in buf.
+      |  mov  PTR, rax
+
+      upb_func *handler = gethandler(h, f, UPB_HANDLER_STARTSTR);
+      if (handler) {
+        |  mov  DECODER->tmp_len, ARG3_64
+        |  mov  ARG1_64, CLOSURE
+        |  mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STARTSTR)
+        |  callp handler
+        |  check_ptr_ret
+        |  mov  ARG1_64, rax   // sub-closure
+        |  mov  ARG4_64, DECODER->tmp_len
+      } else {
+        |  mov  ARG1_64, CLOSURE
+        |  mov  ARG4_64, ARG3_64
+      }
+
+      handler = gethandler(h, f, UPB_HANDLER_STRING);
+      if (handler) {
+        |  mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STRING)
+        |  mov   ARG3_64, PTR
+        |  callp handler
+        // TODO: properly handle returns other than "n" (the whole string).
+        |  add   PTR, rax
+      } else {
+        |  add   PTR, ARG4_64
+      }

-      // Update PTR to point past end of string.
-      |  mov  rdi, rax
-      |  add  rdi, ARG3_64
-      |  mov  PTR, rdi
-
-      // Populate BYTEREGION appropriately.
-      |  sub  rax, DECODER->buf
-      |  add  rax, DECODER->bufstart_ofs  // = d->ptr - d->buf + d->bufstart_ofs
-      |  mov  BYTEREGION->start, rax
-      |  mov  BYTEREGION->discard, rax
-      |  add  rax, ARG3_64
-      |  mov  BYTEREGION->end, rax
-      |  mov  BYTEREGION->fetch, rax // Fast path ensures whole string is loaded
-      |  mov  ARG3_64, BYTEREGION
+      handler = gethandler(h, f, UPB_HANDLER_ENDSTR);
+      if (handler) {
+        |  mov    ARG1_64, CLOSURE
+        |  mov64  ARG2_64, gethandlerdata(h, f, UPB_HANDLER_ENDSTR)
+        |  callp  handler
+        |  check_bool_ret
+      }
      break;
+    }

    // Will dispatch callbacks and call submessage in a second.
    case UPB_TYPE(MESSAGE):
@ -402,85 +513,85 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan,
 }

 static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
-                                       upb_fhandlers *f) {
+                                       const upb_handlers *h,
+                                       const upb_fielddef *f) {
  // Call callbacks.  Specializing the append accessors didn't yield a speed
  // increase in benchmarks.
-  if (upb_issubmsgtype(f->type)) {
-    if (f->type == UPB_TYPE(MESSAGE)) {
+  if (upb_fielddef_issubmsg(f)) {
+    if (upb_fielddef_type(f) == UPB_TYPE(MESSAGE)) {
      |   mov   rsi, PTR
      |   sub   rsi, DECODER->buf
      |   add   rsi, ARG3_64   // = (d->ptr - d->buf) + delim_len
    } else {
-      assert(f->type == UPB_TYPE(GROUP));
+      assert(upb_fielddef_type(f) == UPB_TYPE(GROUP));
      |   mov   rsi, UPB_NONDELIMITED
    }
-    |  pushframe  f, rsi, false
+    |  pushframe  h, f, rsi, UPB_HANDLER_ENDSUBMSG

    // Call startsubmsg handler (if any).
-    if (f->startsubmsg) {
+    upb_func *startsubmsg = gethandler(h, f, UPB_HANDLER_STARTSUBMSG);
+    if (startsubmsg) {
      // upb_sflow_t startsubmsg(void *closure, upb_value fval)
      |  mov   ARG1_64, CLOSURE
-      |  loadfval f
-      |  callp f->startsubmsg
-      |  sethas CLOSURE, f->hasbit
-      |  mov  CLOSURE, rdx
-    } else {
-      |  sethas CLOSURE, f->hasbit
+      |  mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STARTSUBMSG);
+      |  callp startsubmsg
+      |  check_ptr_ret
+      |  mov  CLOSURE, rax
    }
-    |  mov   qword FRAME->closure, CLOSURE
-    // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
-    |  mov   DECODER->ptr, PTR
+    |  mov   qword SINKFRAME->closure, CLOSURE

-    const upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
-    |  call  =>sub_m->jit_startmsg_pclabel;
-    |  popframe upb_fhandlers_getmsg(f)
+    // TODO: have to decide what to do with NULLs subhandlers (or whether to
+    // disallow them and require a full handlers tree to match the def tree).
+    const upb_handlers *sub_h = upb_handlers_getsubhandlers(h, f);
+    assert(sub_h);
+    |  call  =>upb_getpclabel(plan, sub_h, STARTMSG)
+    |  popframe

    // Call endsubmsg handler (if any).
-    if (f->endsubmsg) {
+    upb_func *endsubmsg = gethandler(h, f, UPB_HANDLER_ENDSUBMSG);
+    if (endsubmsg) {
      // upb_flow_t endsubmsg(void *closure, upb_value fval);
      |  mov   ARG1_64, CLOSURE
-      |  loadfval  f
-      |  callp f->endsubmsg
+      |  mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_ENDSUBMSG);
+      |  callp endsubmsg
+      |  check_bool_ret
    }
-    // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
-    |  mov   DECODER->ptr, PTR
-  } else {
+  } else if (!upb_fielddef_isstring(f)) {
    |  mov ARG1_64, CLOSURE
+    upb_handlertype_t handlertype = upb_handlers_getprimitivehandlertype(f);
+    upb_func *handler = gethandler(h, f, handlertype);
+    const upb_stdmsg_fval *fv = (void*)gethandlerdata(h, f, handlertype);
    // Test for callbacks we can specialize.
    // Can't switch() on function pointers.
-    if (f->value == &upb_stdmsg_setint64 ||
-        f->value == &upb_stdmsg_setuint64 ||
-        f->value == &upb_stdmsg_setptr ||
-        f->value == &upb_stdmsg_setdouble) {
-      const upb_fielddef *fd = upb_value_getfielddef(f->fval);
-      |  mov   [ARG1_64 + fd->offset], ARG3_64
-    } else if (f->value == &upb_stdmsg_setint32 ||
-               f->value == &upb_stdmsg_setuint32 ||
-               f->value == &upb_stdmsg_setfloat) {
-      const upb_fielddef *fd = upb_value_getfielddef(f->fval);
-      |  mov   [ARG1_64 + fd->offset], ARG3_32
-    } else if (f->value == &upb_stdmsg_setbool) {
-      const upb_fielddef *fd = upb_value_getfielddef(f->fval);
-      |  mov   [ARG1_64 + fd->offset], ARG3_8
-    } else if (f->value) {
+    if (handler == (void*)&upb_stdmsg_setint64 ||
+        handler == (void*)&upb_stdmsg_setuint64) {
+      |  mov   [ARG1_64 + fv->offset], ARG3_64
+      |  sethas CLOSURE, fv->hasbit
+    } else if (handler == (void*)&upb_stdmsg_setdouble) {
+      |  movsd  qword [ARG1_64 + fv->offset], XMMARG1
+      |  sethas CLOSURE, fv->hasbit
+    } else if (handler == (void*)&upb_stdmsg_setint32 ||
+               handler == (void*)&upb_stdmsg_setuint32) {
+      |  mov   [ARG1_64 + fv->offset], ARG3_32
+      |  sethas CLOSURE, fv->hasbit
+    } else if (handler == (void*)&upb_stdmsg_setfloat) {
+      |  movss  dword [ARG1_64 + fv->offset], XMMARG1
+      |  sethas CLOSURE, fv->hasbit
+    } else if (handler == (void*)&upb_stdmsg_setbool) {
+      |  mov   [ARG1_64 + fv->offset], ARG3_8
+      |  sethas CLOSURE, fv->hasbit
+    } else if (handler) {
      // Load closure and fval into arg registers.
-      ||#ifndef NDEBUG
-      ||// Since upb_value carries type information in debug mode
-      ||// only, we need to pass the arguments slightly differently.
-      |    mov ARG4_64, ARG3_64
-      |    mov ARG5_32, upb_types[f->type].inmemory_type
-      ||#endif
-      |  loadfval f
-      |  callp  f->value
+      |  mov64  ARG2_64, gethandlerdata(h, f, handlertype);
+      |  callp  handler
+      |  check_bool_ret
    }
-    |  sethas CLOSURE, f->hasbit
-    // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
-    |  mov   DECODER->ptr, PTR
  }
 }

-static uint64_t upb_get_encoded_tag(upb_fhandlers *f) {
-  uint32_t tag = (f->number << 3) | upb_decoder_types[f->type].native_wire_type;
+static uint64_t upb_get_encoded_tag(const upb_fielddef *f) {
+  uint32_t tag = (upb_fielddef_number(f) << 3) |
+      upb_decoder_types[upb_fielddef_type(f)].native_wire_type;
  uint64_t encoded_tag = upb_vencode32(tag);
  // No tag should be greater than 5 bytes.
  assert(encoded_tag <= 0xffffffffff);
@ -488,118 +599,121 @@ static uint64_t upb_get_encoded_tag(upb_fhandlers *f) {
 }

 // PTR should point to the beginning of the tag.
-static void upb_decoderplan_jit_field(upb_decoderplan *plan, upb_mhandlers *m,
-                                      upb_fhandlers *f, upb_fhandlers *next_f) {
+static void upb_decoderplan_jit_field(upb_decoderplan *plan,
+                                      const upb_handlers *h,
+                                      const upb_fielddef *f,
+                                      const upb_fielddef *next_f) {
  uint64_t tag = upb_get_encoded_tag(f);
  uint64_t next_tag = next_f ? upb_get_encoded_tag(next_f) : 0;
+  int tag_size = upb_value_size(tag);

  // PC-label for the dispatch table.
  // We check the wire type (which must be loaded in edx) because the
  // table is keyed on field number, not type.
-  |=>f->jit_pclabel:
+  |=>upb_getpclabel(plan, f, FIELD):
  |  cmp  edx, (tag & 0x7)
  |  jne  ->exit_jit     // In the future: could be an unknown field or packed.
-  |=>f->jit_pclabel_notypecheck:
-  if (f->repeated) {
+  |=>upb_getpclabel(plan, f, FIELD_NO_TYPECHECK):
+  if (upb_fielddef_isseq(f)) {
    |  mov   rsi, FRAME->end_ofs
-    |  pushframe  f, rsi, true
-    if (f->startseq) {
+    |  pushframe  h, f, rsi, UPB_HANDLER_ENDSEQ
+    upb_func *startseq = gethandler(h, f, UPB_HANDLER_STARTSEQ);
+    if (startseq) {
      |  mov    ARG1_64, CLOSURE
-      |  loadfval f
-      |  callp  f->startseq
-      |  sethas CLOSURE, f->hasbit
-      |  mov    CLOSURE, rdx
-    } else {
-      |  sethas CLOSURE, f->hasbit
+      |  mov64  ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STARTSEQ);
+      |  callp  startseq
+      |  check_ptr_ret
+      |  mov    CLOSURE, rax
    }
-    |  mov   qword FRAME->closure, CLOSURE
+    |  mov   qword SINKFRAME->closure, CLOSURE
  }

  |1:  // Label for repeating this field.

-  int tag_size = upb_value_size(tag);
-  if (f->type == UPB_TYPE_ENDGROUP) {
-    |  add  PTR, tag_size
-    |  jmp  =>m->jit_endofmsg_pclabel
-    return;
-  }
-
-  upb_decoderplan_jit_decodefield(plan, f->type, tag_size);
-  upb_decoderplan_jit_callcb(plan, f);
+  upb_decoderplan_jit_decodefield(plan, upb_fielddef_type(f), tag_size, h, f);
+  upb_decoderplan_jit_callcb(plan, h, f);

  // Epilogue: load next tag, check for repeated field.
-  |  check_eob   m
+  |  checkpoint  h
  |  mov         rcx, qword [PTR]
-  if (f->repeated) {
+  if (upb_fielddef_isseq(f)) {
    |  checktag  tag
    |  je  <1
-    if (f->endseq) {
+    upb_func *endseq = gethandler(h, f, UPB_HANDLER_ENDSEQ);
+    if (endseq) {
      |  mov   ARG1_64, CLOSURE
-      |  loadfval f
-      |  callp f->endseq
+      |  mov64  ARG2_64, gethandlerdata(h, f, UPB_HANDLER_ENDSEQ);
+      |  callp endseq
    }
-    |  popframe m
+    |  popframe
+    // Load next tag again (popframe clobbered it).
+    |  mov         rcx, qword [PTR]
  }
+
  if (next_tag != 0) {
    |  checktag  next_tag
-    |  je  =>next_f->jit_pclabel_notypecheck
+    |  je  =>upb_getpclabel(plan, next_f, FIELD_NO_TYPECHECK)
  }

  // Fall back to dynamic dispatch.
-  |  dyndispatch  m
-  |1:
+  |  dyndispatch  h
 }

 static int upb_compare_uint32(const void *a, const void *b) {
-  // TODO: always put ENDGROUP at the end.
  return *(uint32_t*)a - *(uint32_t*)b;
 }

-static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) {
-  |=>m->jit_afterstartmsg_pclabel:
+static void upb_decoderplan_jit_msg(upb_decoderplan *plan,
+                                    const upb_handlers *h) {
+  |=>upb_getpclabel(plan, h, AFTER_STARTMSG):
  // There was a call to get here, so we need to align the stack.
  |  sub  rsp, 8
  |  jmp  >1

-  |=>m->jit_startmsg_pclabel:
+  |=>upb_getpclabel(plan, h, STARTMSG):
  // There was a call to get here, so we need to align the stack.
  |  sub  rsp, 8

  // Call startmsg handler (if any):
-  if (m->startmsg) {
+  upb_startmsg_handler *startmsg = upb_handlers_getstartmsg(h);
+  if (startmsg) {
    // upb_flow_t startmsg(void *closure);
-    |  mov   ARG1_64, FRAME->closure
-    |  callp m->startmsg
-    // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+    |  mov   ARG1_64, SINKFRAME->closure
+    |  callp startmsg
+    |  check_bool_ret
  }

  |1:
-  |  setmsgend  m
-  |  check_eob   m
+  |  setmsgend
+  |  checkpoint h
  |  mov    ecx, dword [PTR]
-  |  dyndispatch_ m
+  |  dyndispatch_ h

  // --------- New code section (does not fall through) ------------------------

  // Emit code for parsing each field (dynamic dispatch contains pointers to
  // all of these).

-  // Create an ordering over the fields (inttable ordering is undefined).
-  int num_keys = upb_inttable_count(&m->fieldtab);
+  // Create an ordering over the fields in field number order.
+  // Parsing will theoretically be fastest if we emit code in the same
+  // order as field numbers are seen on-the-wire because of an optimization
+  // in the generated code that skips dynamic dispatch if the next field is
+  // as expected.
+  const upb_msgdef *md = upb_handlers_msgdef(h);
+  int num_keys = upb_msgdef_numfields(md);
  uint32_t *keys = malloc(num_keys * sizeof(*keys));
  int idx = 0;
-  upb_inttable_iter i;
-  upb_inttable_begin(&i, &m->fieldtab);
-  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
-    keys[idx++] = upb_inttable_iter_key(&i);
+  upb_msg_iter i;
+  for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
+    keys[idx++] = upb_fielddef_number(upb_msg_iter_field(&i));
  }
  qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);

  for(int i = 0; i < num_keys; i++) {
-    upb_fhandlers *f = upb_mhandlers_lookup(m, keys[i]);
-    upb_fhandlers *next_f =
-        (i + 1 < num_keys) ? upb_mhandlers_lookup(m, keys[i + 1]) : NULL;
-    upb_decoderplan_jit_field(plan, m, f, next_f);
+    const upb_fielddef *f = upb_msgdef_itof(md, keys[i]);
+    const upb_fielddef *next_f =
+        (i + 1 < num_keys) ? upb_msgdef_itof(md, keys[i + 1]) : NULL;
+    upb_decoderplan_jit_field(plan, h, f, next_f);
  }

  free(keys);
@ -607,27 +721,19 @@ static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) {
  // --------- New code section (does not fall through) ------------------------

  // End-of-buf / end-of-message.
-  if (!m->is_group) {
-    // This case doesn't exist for groups, because there eob really means
-    // eob, so that case just exits the jit directly.
-    |=>m->jit_endofbuf_pclabel:
-    |  cmp  PTR, DECODER->delim_end
-    |  jb   ->exit_jit    // We are at eob, but not end-of-submsg.
-  }
+  // We hit a buffer limit; either we hit jit_end or end-of-submessage.
+  |=>upb_getpclabel(plan, h, ENDOFBUF):
+  |  cmp  PTR, DECODER->jit_end
+  |  jae  ->exit_jit

-  |=>m->jit_endofmsg_pclabel:
+  |=>upb_getpclabel(plan, h, ENDOFMSG):
  // We are at end-of-submsg: call endmsg handler (if any):
-  if (m->endmsg) {
+  upb_endmsg_handler *endmsg = upb_handlers_getendmsg(h);
+  if (endmsg) {
    // void endmsg(void *closure, upb_status *status) {
-    |  mov   ARG1_64, FRAME->closure
-    |  lea   ARG2_64, DECODER->dispatcher.status
-    |  callp m->endmsg
-  }
-
-  if (m->is_group) {
-    // Advance past the "end group" tag.
-    // TODO: Handle UPB_BREAK
-    |  mov   DECODER->ptr, PTR
+    |  mov   ARG1_64, SINKFRAME->closure
+    |  lea   ARG2_64, DECODER->sink.status
+    |  callp endmsg
  }

  // Counter previous alignment.
@ -657,9 +763,9 @@ static void upb_decoderplan_jit(upb_decoderplan *plan) {
  // Align stack.
  |  sub   rsp, 8
  |  mov   DECODER, ARG1_64
-  |  mov   FRAME, DECODER:ARG1_64->dispatcher.top
-  |  lea   BYTEREGION, DECODER:ARG1_64->str_byteregion
-  |  mov   CLOSURE, FRAME->closure
+  |  mov   FRAME, DECODER:ARG1_64->top
+  |  mov   SINKFRAME, DECODER:ARG1_64->sink.top
+  |  mov   CLOSURE, SINKFRAME->closure
  |  mov   PTR, DECODER->ptr

  // TODO: push return addresses for re-entry (will be necessary for multiple
@ -680,54 +786,65 @@ static void upb_decoderplan_jit(upb_decoderplan *plan) {
  |  leave
  |  ret

-  upb_handlers *h = plan->handlers;
-  for (int i = 0; i < h->msgs_len; i++)
-    upb_decoderplan_jit_msg(plan, h->msgs[i]);
-}
-
-static void upb_decoderplan_jit_assignfieldlabs(upb_fhandlers *f,
-                                                uint32_t *pclabel_count) {
-  f->jit_pclabel = (*pclabel_count)++;
-  f->jit_pclabel_notypecheck = (*pclabel_count)++;
-}
-
-static void upb_decoderplan_jit_assignmsglabs(upb_mhandlers *m,
-                                              uint32_t *pclabel_count) {
-  m->jit_startmsg_pclabel = (*pclabel_count)++;
-  m->jit_afterstartmsg_pclabel = (*pclabel_count)++;
-  m->jit_endofbuf_pclabel = (*pclabel_count)++;
-  m->jit_endofmsg_pclabel = (*pclabel_count)++;
-  m->jit_dyndispatch_pclabel = (*pclabel_count)++;
-  m->jit_unknownfield_pclabel = (*pclabel_count)++;
-  m->max_field_number = 0;
  upb_inttable_iter i;
-  upb_inttable_begin(&i, &m->fieldtab);
+  upb_inttable_begin(&i, &plan->msginfo);
  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
-    uint32_t key = upb_inttable_iter_key(&i);
-    m->max_field_number = UPB_MAX(m->max_field_number, key);
-    upb_fhandlers *f = upb_value_getptr(upb_inttable_iter_value(&i));
-    upb_decoderplan_jit_assignfieldlabs(f, pclabel_count);
+    const upb_handlers *h = (const upb_handlers*)upb_inttable_iter_key(&i);
+    upb_decoderplan_jit_msg(plan, h);
+  }
+}
+
+static void upb_decoderplan_jit_assignpclabels(upb_decoderplan *plan,
+                                               const upb_handlers *h) {
+  // Limit the DFS.
+  if (upb_inttable_lookupptr(&plan->pclabels, h)) return;
+
+  upb_inttable_insertptr(&plan->pclabels, h,
+                         upb_value_uint32(plan->pclabel_count));
+  plan->pclabel_count += TOTAL_MSG_PCLABELS;
+
+  upb_jitmsginfo *info = malloc(sizeof(*info));
+  info->max_field_number = 0;
+  upb_inttable_insertptr(&plan->msginfo, h, upb_value_ptr(info));
+
+  upb_msg_iter i;
+  upb_msg_begin(&i, upb_handlers_msgdef(h));
+  for(; !upb_msg_done(&i); upb_msg_next(&i)) {
+    const upb_fielddef *f = upb_msg_iter_field(&i);
+    info->max_field_number =
+        UPB_MAX(info->max_field_number, upb_fielddef_number(f));
+    upb_inttable_insertptr(&plan->pclabels, f,
+                           upb_value_uint32(plan->pclabel_count));
+    plan->pclabel_count += TOTAL_FIELD_PCLABELS;
+
+    // Discover the whole graph of handlers depth-first.  We will probably
+    // revise this later to be more explicit about the list of handlers that
+    // the plan should include.
+    if (upb_fielddef_issubmsg(f)) {
+      const upb_handlers *subh = upb_handlers_getsubhandlers(h, f);
+      if (subh) upb_decoderplan_jit_assignpclabels(plan, subh);
+    }
  }
  // TODO: support large field numbers by either using a hash table or
  // generating code for a binary search.  For now large field numbers
  // will just fall back to the table decoder.
-  m->max_field_number = UPB_MIN(m->max_field_number, 16000);
-  m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*));
+  info->max_field_number = UPB_MIN(info->max_field_number, 16000);
+  info->tablearray = malloc((info->max_field_number + 1) * sizeof(void*));
 }

 static void upb_decoderplan_makejit(upb_decoderplan *plan) {
+  upb_inttable_init(&plan->msginfo, UPB_CTYPE_PTR);
  plan->debug_info = NULL;

  // Assign pclabels.
-  uint32_t pclabel_count = 0;
-  upb_handlers *h = plan->handlers;
-  for (int i = 0; i < h->msgs_len; i++)
-    upb_decoderplan_jit_assignmsglabs(h->msgs[i], &pclabel_count);
+  plan->pclabel_count = 0;
+  upb_inttable_init(&plan->pclabels, UPB_CTYPE_UINT32);
+  upb_decoderplan_jit_assignpclabels(plan, plan->handlers);

  void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals));
  dasm_init(plan, 1);
  dasm_setupglobal(plan, globals, UPB_JIT_GLOBAL__MAX);
-  dasm_growpc(plan, pclabel_count);
+  dasm_growpc(plan, plan->pclabel_count);
  dasm_setup(plan, upb_jit_actionlist);

  upb_decoderplan_jit(plan);
@ -744,38 +861,53 @@ static void upb_decoderplan_makejit(upb_decoderplan *plan) {
  dasm_encode(plan, plan->jit_code);

  // Create dispatch tables.
-  for (int i = 0; i < h->msgs_len; i++) {
-    upb_mhandlers *m = h->msgs[i];
+  upb_inttable_iter i;
+  upb_inttable_begin(&i, &plan->msginfo);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    const upb_handlers *h = (const upb_handlers*)upb_inttable_iter_key(&i);
+    upb_jitmsginfo *mi = upb_getmsginfo(plan, h);
    // We jump to after the startmsg handler since it is called before entering
    // the JIT (either by upb_decoder or by a previous call to the JIT).
-    m->jit_func =
-        plan->jit_code + dasm_getpclabel(plan, m->jit_afterstartmsg_pclabel);
-    for (uint32_t j = 0; j <= m->max_field_number; j++) {
-      upb_fhandlers *f = upb_mhandlers_lookup(m, j);
+    mi->jit_func = plan->jit_code +
+        dasm_getpclabel(plan, upb_getpclabel(plan, h, AFTER_STARTMSG));
+    for (uint32_t j = 0; j <= mi->max_field_number; j++) {
+      const upb_fielddef *f = upb_msgdef_itof(upb_handlers_msgdef(h), j);
      if (f) {
-        m->tablearray[j] =
-            plan->jit_code + dasm_getpclabel(plan, f->jit_pclabel);
+        mi->tablearray[j] = plan->jit_code +
+            dasm_getpclabel(plan, upb_getpclabel(plan, f, FIELD));
      } else {
        // TODO: extend the JIT to handle unknown fields.
        // For the moment we exit the JIT for any unknown field.
-        m->tablearray[j] = globals[UPB_JIT_GLOBAL_exit_jit];
+        mi->tablearray[j] = globals[UPB_JIT_GLOBAL_exit_jit];
      }
    }
  }

+  upb_inttable_uninit(&plan->pclabels);
+
  dasm_free(plan);
  free(globals);

  mprotect(plan->jit_code, plan->jit_size, PROT_EXEC | PROT_READ);

+#ifndef NDEBUG
  // View with: objdump -M intel -D -b binary -mi386 -Mx86-64 /tmp/machine-code
  // Or: ndisasm -b 64 /tmp/machine-code
  FILE *f = fopen("/tmp/machine-code", "wb");
  fwrite(plan->jit_code, plan->jit_size, 1, f);
  fclose(f);
+#endif
 }

 static void upb_decoderplan_freejit(upb_decoderplan *plan) {
+  upb_inttable_iter i;
+  upb_inttable_begin(&i, &plan->msginfo);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    upb_jitmsginfo *mi = upb_value_getptr(upb_inttable_iter_value(&i));
+    free(mi->tablearray);
+    free(mi);
+  }
+  upb_inttable_uninit(&plan->msginfo);
  munmap(plan->jit_code, plan->jit_size);
  free(plan->debug_info);
  // TODO: unregister
@ -783,7 +915,7 @@ static void upb_decoderplan_freejit(upb_decoderplan *plan) {

 static void upb_decoder_enterjit(upb_decoder *d) {
  if (d->plan->jit_code &&
-      d->dispatcher.top == d->dispatcher.stack &&
+      d->sink.top == d->sink.stack &&
      d->ptr && d->ptr < d->jit_end) {
 #ifndef NDEBUG
    register uint64_t rbx asm ("rbx") = 11;
@ -795,7 +927,9 @@ static void upb_decoder_enterjit(upb_decoder *d) {
    // Decodes as many fields as possible, updating d->ptr appropriately,
    // before falling through to the slow(er) path.
    void (*upb_jit_decode)(upb_decoder *d, void*) = (void*)d->plan->jit_code;
-    upb_jit_decode(d, d->plan->handlers->msgs[d->msg_offset]->jit_func);
+    upb_jitmsginfo *mi = upb_getmsginfo(d->plan, d->plan->handlers);
+    assert(mi);
+    upb_jit_decode(d, mi->jit_func);
    assert(d->ptr <= d->end);

    // Test that callee-save registers were properly restored.
--- a/upb/pb/glue.c
+++ b/upb/pb/glue.c
@ -5,10 +5,14 @@
 * Author: Josh Haberman <jhaberman@gmail.com>
 */

+#include "upb/pb/glue.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 #include "upb/bytestream.h"
 #include "upb/descriptor/reader.h"
 #include "upb/pb/decoder.h"
-#include "upb/pb/glue.h"

 upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
                                        void *owner, upb_status *status) {
@ -16,16 +20,14 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
  upb_stringsrc_init(&strsrc);
  upb_stringsrc_reset(&strsrc, str, len);

-  upb_handlers *h = upb_handlers_new();
-  upb_descreader_reghandlers(h);
-
+  const upb_handlers *h = upb_descreader_newhandlers(&h);
  upb_decoderplan *p = upb_decoderplan_new(h, false);
  upb_decoder d;
  upb_decoder_init(&d);
-  upb_handlers_unref(h);
+  upb_handlers_unref(h, &h);
  upb_descreader r;
  upb_descreader_init(&r);
-  upb_decoder_resetplan(&d, p, 0);
+  upb_decoder_resetplan(&d, p);
  upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), &r);

  upb_success_t ret = upb_decoder_decode(&d);
--- a/upb/pb/glue.h
+++ b/upb/pb/glue.h
@ -27,8 +27,7 @@
 #define UPB_GLUE_H

 #include <stdbool.h>
-#include "upb/upb.h"
-#include "upb/def.h"
+#include "upb/symtab.h"

 #ifdef __cplusplus
 extern "C" {
@ -55,6 +54,29 @@ char *upb_readfile(const char *filename, size_t *len);

 #ifdef __cplusplus
 }  /* extern "C" */
+
+namespace upb {
+
+// All routines that load descriptors expect the descriptor to be a
+// FileDescriptorSet.
+inline bool LoadDescriptorFileIntoSymtab(SymbolTable* s, const char *fname,
+                                         Status* status) {
+  return upb_load_descriptor_file_into_symtab(s, fname, status);
+}
+
+inline bool LoadDescriptorIntoSymtab(SymbolTable* s, const char* str,
+                                     size_t len, Status* status) {
+  return upb_load_descriptor_into_symtab(s, str, len, status);
+}
+
+// Templated so it can accept both string and std::string.
+template <typename T>
+bool LoadDescriptorIntoSymtab(SymbolTable* s, const T& desc, Status* status) {
+  return upb_load_descriptor_into_symtab(s, desc.c_str(), desc.size(), status);
+}
+
+}  // namespace upb
+
 #endif

 #endif
--- a/upb/pb/textprinter.c
+++ b/upb/pb/textprinter.c
@ -5,11 +5,14 @@
 * Author: Josh Haberman <jhaberman@gmail.com>
 */

+#include "upb/pb/textprinter.h"
+
 #include <ctype.h>
 #include <float.h>
 #include <inttypes.h>
+#include <stdio.h>
 #include <stdlib.h>
-#include "upb/pb/textprinter.h"
+#include <string.h>

 struct _upb_textprinter {
  upb_bytesink *sink;
@ -20,7 +23,7 @@ struct _upb_textprinter {

 #define CHECK(x) if ((x) < 0) goto err;

-static int upb_textprinter_indent(upb_textprinter *p) {
+static int indent(upb_textprinter *p) {
  if (!p->single_line)
    CHECK(upb_bytesink_putrepeated(p->sink, ' ', p->indent_depth*2));
  return 0;
@ -28,37 +31,32 @@ err:
  return -1;
 }

-static int upb_textprinter_endfield(upb_textprinter *p) {
+static int endfield(upb_textprinter *p) {
  CHECK(upb_bytesink_putc(p->sink, p->single_line ? ' ' : '\n'));
  return 0;
 err:
  return -1;
 }

-static int upb_textprinter_putescaped(upb_textprinter *p,
-                                      const upb_byteregion *bytes,
-                                      bool preserve_utf8) {
+static int putescaped(upb_textprinter *p, const char *buf, size_t len,
+                      bool preserve_utf8) {
  // Based on CEscapeInternal() from Google's protobuf release.
-  // TODO; we could read directly from a bytesrc's buffer instead.
-  // TODO; we could write byteregions to the sink when possible.
-  char dstbuf[512], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
-  char *buf = malloc(upb_byteregion_len(bytes)), *src = buf;
-  char *end = src + upb_byteregion_len(bytes);
-  upb_byteregion_copyall(bytes, buf);
+  char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
+  const char *end = buf + len;

  // I think hex is prettier and more useful, but proto2 uses octal; should
  // investigate whether it can parse hex also.
  const bool use_hex = false;
  bool last_hex_escape = false; // true if last output char was \xNN

-  for (; src < end; src++) {
+  for (; buf < end; buf++) {
    if (dstend - dst < 4) {
      CHECK(upb_bytesink_write(p->sink, dstbuf, dst - dstbuf));
      dst = dstbuf;
    }

    bool is_hex_escape = false;
-    switch (*src) {
+    switch (*buf) {
      case '\n': *(dst++) = '\\'; *(dst++) = 'n';  break;
      case '\r': *(dst++) = '\\'; *(dst++) = 'r';  break;
      case '\t': *(dst++) = '\\'; *(dst++) = 't';  break;
@ -66,123 +64,123 @@ static int upb_textprinter_putescaped(upb_textprinter *p,
      case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
      case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
      default:
-        // Note that if we emit \xNN and the src character after that is a hex
+        // Note that if we emit \xNN and the buf character after that is a hex
        // digit then that digit must be escaped too to prevent it being
        // interpreted as part of the character code by C.
-        if ((!preserve_utf8 || (uint8_t)*src < 0x80) &&
-            (!isprint(*src) || (last_hex_escape && isxdigit(*src)))) {
-          sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*src);
+        if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
+            (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
+          sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
          is_hex_escape = use_hex;
          dst += 4;
        } else {
-          *(dst++) = *src; break;
+          *(dst++) = *buf; break;
        }
    }
    last_hex_escape = is_hex_escape;
  }
  // Flush remaining data.
  CHECK(upb_bytesink_write(p->sink, dst, dst - dstbuf));
-  free(buf);
  return 0;
 err:
-  free(buf);
  return -1;
 }

-#define TYPE(member, fmt) \
-  static upb_flow_t upb_textprinter_put ## member(void *_p, upb_value fval,  \
-                                                  upb_value val) {           \
+#define TYPE(name, ctype, fmt) \
+  static bool put ## name(void *_p, void *fval, ctype val) {                 \
    upb_textprinter *p = _p;                                                 \
-    const upb_fielddef *f = upb_value_getfielddef(fval);                     \
-    uint64_t start_ofs = upb_bytesink_getoffset(p->sink);                    \
-    CHECK(upb_textprinter_indent(p));                                        \
+    const upb_fielddef *f = fval;                                            \
+    CHECK(indent(p));                                                        \
    CHECK(upb_bytesink_writestr(p->sink, upb_fielddef_name(f)));             \
    CHECK(upb_bytesink_writestr(p->sink, ": "));                             \
-    CHECK(upb_bytesink_printf(p->sink, fmt, upb_value_get ## member(val)));  \
-    CHECK(upb_textprinter_endfield(p));                                      \
-    return UPB_CONTINUE;                                                     \
+    CHECK(upb_bytesink_printf(p->sink, fmt, val));                           \
+    CHECK(endfield(p));                                                      \
+    return true;                                                             \
  err:                                                                       \
-    upb_bytesink_rewind(p->sink, start_ofs);                                 \
-    return UPB_BREAK;                                                        \
+    return false;                                                            \
 }

 #define STRINGIFY_HELPER(x) #x
 #define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)

-TYPE(double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
-TYPE(float,  "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
-TYPE(int64,  "%" PRId64)
-TYPE(uint64, "%" PRIu64)
-TYPE(int32,  "%" PRId32)
-TYPE(uint32, "%" PRIu32);
-TYPE(bool,   "%hhu");
+TYPE(int32,  int32_t,  "%" PRId32)
+TYPE(int64,  int64_t,  "%" PRId64)
+TYPE(uint32, uint32_t, "%" PRIu32);
+TYPE(uint64, uint64_t, "%" PRIu64)
+TYPE(float,  float,    "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
+TYPE(double, double,   "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
+TYPE(bool,   bool,     "%hhu");

 // Output a symbolic value from the enum if found, else just print as int32.
-static upb_flow_t upb_textprinter_putenum(void *_p, upb_value fval,
-                                          upb_value val) {
+static bool putenum(void *_p, void *fval, int32_t val) {

  upb_textprinter *p = _p;
-  uint64_t start_ofs = upb_bytesink_getoffset(p->sink);
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  const upb_enumdef *enum_def =
-      upb_downcast_enumdef_const(upb_fielddef_subdef(f));
-  const char *label = upb_enumdef_iton(enum_def, upb_value_getint32(val));
+  const upb_fielddef *f = fval;
+  const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f));
+  const char *label = upb_enumdef_iton(enum_def, val);
  if (label) {
    CHECK(upb_bytesink_writestr(p->sink, label));
  } else {
-    CHECK(upb_textprinter_putint32(_p, fval, val));
+    CHECK(putint32(_p, fval, val));
  }
-  return UPB_CONTINUE;
+  return true;
 err:
-  upb_bytesink_rewind(p->sink, start_ofs);
-  return UPB_BREAK;
+  return false;
 }

-static upb_flow_t upb_textprinter_putstr(void *_p, upb_value fval,
-                                         upb_value val) {
+static void *startstr(void *_p, void *fval, size_t size_hint) {
+  UPB_UNUSED(size_hint);
+  UPB_UNUSED(fval);
  upb_textprinter *p = _p;
-  uint64_t start_ofs = upb_bytesink_getoffset(p->sink);
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  CHECK(upb_bytesink_putc(p->sink, '"'));
-  CHECK(upb_textprinter_putescaped(p, upb_value_getbyteregion(val),
-                                   f->type == UPB_TYPE(STRING)));
  CHECK(upb_bytesink_putc(p->sink, '"'));
-  return UPB_CONTINUE;
+  return p;
 err:
-  upb_bytesink_rewind(p->sink, start_ofs);
  return UPB_BREAK;
 }

-static upb_sflow_t upb_textprinter_startsubmsg(void *_p, upb_value fval) {
+static bool endstr(void *_p, void *fval) {
+  UPB_UNUSED(fval);
+  upb_textprinter *p = _p;
+  CHECK(upb_bytesink_putc(p->sink, '"'));
+  return true;
+err:
+  return false;
+}
+
+static size_t putstr(void *_p, void *fval, const char *buf, size_t len) {
  upb_textprinter *p = _p;
-  uint64_t start_ofs = upb_bytesink_getoffset(p->sink);
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  CHECK(upb_textprinter_indent(p));
+  const upb_fielddef *f = fval;
+  CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE(STRING)));
+  return len;
+err:
+  return 0;
+}
+
+static void *startsubmsg(void *_p, void *fval) {
+  upb_textprinter *p = _p;
+  const upb_fielddef *f = fval;
+  CHECK(indent(p));
  CHECK(upb_bytesink_printf(p->sink, "%s {", upb_fielddef_name(f)));
  if (!p->single_line)
    CHECK(upb_bytesink_putc(p->sink, '\n'));
  p->indent_depth++;
-  return UPB_CONTINUE_WITH(_p);
+  return _p;
 err:
-  upb_bytesink_rewind(p->sink, start_ofs);
-  return UPB_SBREAK;
+  return UPB_BREAK;
 }

-static upb_flow_t upb_textprinter_endsubmsg(void *_p, upb_value fval) {
-  (void)fval;
+static bool endsubmsg(void *_p, void *fval) {
+  UPB_UNUSED(fval);
  upb_textprinter *p = _p;
-  uint64_t start_ofs = upb_bytesink_getoffset(p->sink);
  p->indent_depth--;
-  CHECK(upb_textprinter_indent(p));
+  CHECK(indent(p));
  CHECK(upb_bytesink_putc(p->sink, '}'));
-  CHECK(upb_textprinter_endfield(p));
-  return UPB_CONTINUE;
+  CHECK(endfield(p));
+  return true;
 err:
-  upb_bytesink_rewind(p->sink, start_ofs);
-  return UPB_BREAK;
+  return false;
 }

-upb_textprinter *upb_textprinter_new(void) {
+upb_textprinter *upb_textprinter_new() {
  upb_textprinter *p = malloc(sizeof(*p));
  return p;
 }
@ -196,22 +194,61 @@ void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink,
  p->indent_depth = 0;
 }

-static void upb_textprinter_onfreg(void *c, upb_fhandlers *fh, const upb_fielddef *f) {
+static void onmreg(void *c, upb_handlers *h) {
  (void)c;
-  upb_fhandlers_setstartsubmsg(fh, &upb_textprinter_startsubmsg);
-  upb_fhandlers_setendsubmsg(fh, &upb_textprinter_endsubmsg);
-#define F(type) &upb_textprinter_put ## type
-  static upb_value_handler *fptrs[] = {NULL, F(double), F(float), F(int64),
-      F(uint64), F(int32), F(uint64), F(uint32), F(bool), F(str),
-      NULL, NULL, F(str), F(uint32), F(enum), F(int32),
-      F(int64), F(int32), F(int64)};
-  upb_fhandlers_setvalue(fh, fptrs[f->type]);
-  upb_value fval;
-  upb_value_setfielddef(&fval, f);
-  upb_fhandlers_setfval(fh, fval);
+  const upb_msgdef *m = upb_handlers_msgdef(h);
+  upb_msg_iter i;
+  for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+    upb_fielddef *f = upb_msg_iter_field(&i);
+    switch (upb_fielddef_type(f)) {
+      case UPB_TYPE_INT32:
+      case UPB_TYPE_SINT32:
+      case UPB_TYPE_SFIXED32:
+        upb_handlers_setint32(h, f, putint32, f, NULL);
+        break;
+      case UPB_TYPE_SINT64:
+      case UPB_TYPE_SFIXED64:
+      case UPB_TYPE_INT64:
+        upb_handlers_setint64(h, f, putint64, f, NULL);
+        break;
+      case UPB_TYPE_UINT32:
+      case UPB_TYPE_FIXED32:
+        upb_handlers_setuint32(h, f, putuint32, f, NULL);
+        break;
+      case UPB_TYPE_UINT64:
+      case UPB_TYPE_FIXED64:
+        upb_handlers_setuint64(h, f, putuint64, f, NULL);
+        break;
+      case UPB_TYPE_FLOAT:
+        upb_handlers_setfloat(h, f, putfloat, f, NULL);
+        break;
+      case UPB_TYPE_DOUBLE:
+        upb_handlers_setdouble(h, f, putdouble, f, NULL);
+        break;
+      case UPB_TYPE_BOOL:
+        upb_handlers_setbool(h, f, putbool, f, NULL);
+        break;
+      case UPB_TYPE_STRING:
+      case UPB_TYPE_BYTES:
+        upb_handlers_setstartstr(h, f, startstr, f, NULL);
+        upb_handlers_setstring(h, f, putstr, f, NULL);
+        upb_handlers_setendstr(h, f, endstr, f, NULL);
+        break;
+      case UPB_TYPE_GROUP:
+      case UPB_TYPE_MESSAGE:
+        upb_handlers_setstartsubmsg(h, f, &startsubmsg, f, NULL);
+        upb_handlers_setendsubmsg(h, f, &endsubmsg, f, NULL);
+        break;
+      case UPB_TYPE_ENUM:
+        upb_handlers_setint32(h, f, putenum, f, NULL);
+      default:
+        assert(false);
+        break;
+    }
+  }
 }

-upb_mhandlers *upb_textprinter_reghandlers(upb_handlers *h, const upb_msgdef *m) {
-  return upb_handlers_regmsgdef(
-      h, m, NULL, &upb_textprinter_onfreg, NULL);
+const upb_handlers *upb_textprinter_newhandlers(const void *owner,
+                                                const upb_msgdef *m) {
+  return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
 }
--- a/upb/pb/textprinter.h
+++ b/upb/pb/textprinter.h
@ -18,11 +18,12 @@ extern "C" {
 struct _upb_textprinter;
 typedef struct _upb_textprinter upb_textprinter;

-upb_textprinter *upb_textprinter_new(void);
+upb_textprinter *upb_textprinter_new();
 void upb_textprinter_free(upb_textprinter *p);
 void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink,
                           bool single_line);
-upb_mhandlers *upb_textprinter_reghandlers(upb_handlers *h, const upb_msgdef *m);
+const upb_handlers *upb_textprinter_newhandlers(const void *owner,
+                                                const upb_msgdef *m);

 #ifdef __cplusplus
 }  /* extern "C" */
--- a/upb/pb/varint.c
+++ b/upb/pb/varint.c
@ -7,16 +7,64 @@

 #include "upb/pb/varint.h"

+// A basic branch-based decoder, uses 32-bit values to get good performance
+// on 32-bit architectures (but performs well on 64-bits also).
+// This scheme comes from the original Google Protobuf implementation (proto2).
+upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
+  upb_decoderet err = {NULL, 0};
+  const char *p = r.p;
+  uint32_t low = (uint32_t)r.val;
+  uint32_t high = 0;
+  uint32_t b;
+  b = *(p++); low  |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
+  b = *(p++); low  |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
+  b = *(p++); low  |= (b & 0x7fU) << 28;
+              high  = (b & 0x7fU) >>  4; if (!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7fU) <<  3; if (!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done;
+  return err;
+
+done:
+  r.val = ((uint64_t)high << 32) | low;
+  r.p = p;
+  return r;
+}
+
+// Like the previous, but uses 64-bit values.
+upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
+  const char *p = r.p;
+  uint64_t val = r.val;
+  uint64_t b;
+  upb_decoderet err = {NULL, 0};
+  b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done;
+  return err;
+
+done:
+  r.val = val;
+  r.p = p;
+  return r;
+}
+
 // Given an encoded varint v, returns an integer with a single bit set that
 // indicates the end of the varint.  Subtracting one from this value will
 // yield a mask that leaves only bits that are part of the varint.  Returns
 // 0 if the varint is unterminated.
-INLINE uint64_t upb_get_vstopbit(uint64_t v) {
+static uint64_t upb_get_vstopbit(uint64_t v) {
  uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
  return ~cbits & (cbits+1);
 }
-INLINE uint64_t upb_get_vmask(uint64_t v) { return upb_get_vstopbit(v) - 1; }

+// A branchless decoder.  Credit to Pascal Massimino for the bit-twiddling.
 upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
  uint64_t b;
  memcpy(&b, r.p, sizeof(b));
@ -35,14 +83,15 @@ upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
  return my_r;
 }

+// A branchless decoder.  Credit to Daniel Wright for the bit-twiddling.
 upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
  uint64_t b;
  memcpy(&b, r.p, sizeof(b));
  uint64_t stop_bit = upb_get_vstopbit(b);
  b &= (stop_bit - 1);
-  b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f);
-  b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff);
-  b = ((b & 0xffffffff00000000) >> 4) | (b & 0x00000000ffffffff);
+  b = ((b & 0x7f007f007f007f00ULL) >> 1) | (b & 0x007f007f007f007fULL);
+  b = ((b & 0xffff0000ffff0000ULL) >> 2) | (b & 0x0000ffff0000ffffULL);
+  b = ((b & 0xffffffff00000000ULL) >> 4) | (b & 0x00000000ffffffffULL);
  if (stop_bit == 0) {
    // Error: unterminated varint.
    upb_decoderet err_r = {(void*)0, 0};
--- a/upb/pb/varint.h
+++ b/upb/pb/varint.h
@ -49,71 +49,32 @@ typedef struct {
  uint64_t val;
 } upb_decoderet;

-// A basic branch-based decoder, uses 32-bit values to get good performance
-// on 32-bit architectures (but performs well on 64-bits also).
-INLINE upb_decoderet upb_vdecode_branch32(const char *p) {
-  upb_decoderet r = {NULL, 0};
-  uint32_t low, high = 0;
-  uint32_t b;
-  b = *(p++); low   = (b & 0x7f)      ; if(!(b & 0x80)) goto done;
-  b = *(p++); low  |= (b & 0x7f) <<  7; if(!(b & 0x80)) goto done;
-  b = *(p++); low  |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
-  b = *(p++); low  |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
-  b = *(p++); low  |= (b & 0x7f) << 28;
-              high  = (b & 0x7f) >>  4; if(!(b & 0x80)) goto done;
-  b = *(p++); high |= (b & 0x7f) <<  3; if(!(b & 0x80)) goto done;
-  b = *(p++); high |= (b & 0x7f) << 10; if(!(b & 0x80)) goto done;
-  b = *(p++); high |= (b & 0x7f) << 17; if(!(b & 0x80)) goto done;
-  b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done;
-  b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done;
-  return r;
-
-done:
-  r.val = ((uint64_t)high << 32) | low;
-  r.p = p;
-  return r;
-}
-
-// Like the previous, but uses 64-bit values.
-INLINE upb_decoderet upb_vdecode_branch64(const char *p) {
-  uint64_t val;
-  uint64_t b;
-  upb_decoderet r = {NULL, 0};
-  b = *(p++); val  = (b & 0x7f)      ; if(!(b & 0x80)) goto done;
-  b = *(p++); val |= (b & 0x7f) <<  7; if(!(b & 0x80)) goto done;
-  b = *(p++); val |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
-  b = *(p++); val |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
-  b = *(p++); val |= (b & 0x7f) << 28; if(!(b & 0x80)) goto done;
-  b = *(p++); val |= (b & 0x7f) << 35; if(!(b & 0x80)) goto done;
-  b = *(p++); val |= (b & 0x7f) << 42; if(!(b & 0x80)) goto done;
-  b = *(p++); val |= (b & 0x7f) << 49; if(!(b & 0x80)) goto done;
-  b = *(p++); val |= (b & 0x7f) << 56; if(!(b & 0x80)) goto done;
-  b = *(p++); val |= (b & 0x7f) << 63; if(!(b & 0x80)) goto done;
-  return r;
-
-done:
-  r.val = val;
-  r.p = p;
-  return r;
-}
-
-// Decodes a varint of at most 8 bytes without branching (except for error).
+// Four functions for decoding a varint of at most eight bytes.  They are all
+// functionally identical, but are implemented in different ways and likely have
+// different performance profiles.  We keep them around for performance testing.
+//
+// Note that these functions may not read byte-by-byte, so they must not be used
+// unless there are at least eight bytes left in the buffer!
+upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r);
+upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r);
 upb_decoderet upb_vdecode_max8_wright(upb_decoderet r);
-
-// Another implementation of the previous.
 upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r);

 // Template for a function that checks the first two bytes with branching
-// and dispatches 2-10 bytes with a separate function.
-#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function)                \
-INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) {           \
-  uint8_t *p = (uint8_t*)_p;                                                 \
-  if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7f}; return r; } \
-  upb_decoderet r = {_p + 2, (*p & 0x7f) | ((*(p + 1) & 0x7f) << 7)};        \
-  if ((*(p + 1) & 0x80) == 0) return r;                                      \
-  return decode_max8_function(r);                                            \
+// and dispatches 2-10 bytes with a separate function.  Note that this may read
+// up to 10 bytes, so it must not be used unless there are at least ten bytes
+// left in the buffer!
+#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function)                  \
+INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) {             \
+  uint8_t *p = (uint8_t*)_p;                                                   \
+  if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7fU}; return r; }  \
+  upb_decoderet r = {_p + 2, (*p & 0x7fU) | ((*(p + 1) & 0x7fU) << 7)};        \
+  if ((*(p + 1) & 0x80) == 0) return r;                                        \
+  return decode_max8_function(r);                                              \
 }

+UPB_VARINT_DECODER_CHECK2(branch32, upb_vdecode_max8_branch32);
+UPB_VARINT_DECODER_CHECK2(branch64, upb_vdecode_max8_branch64);
 UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright);
 UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino);
 #undef UPB_VARINT_DECODER_CHECK2
@ -121,11 +82,10 @@ UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino);
 // Our canonical functions for decoding varints, based on the currently
 // favored best-performing implementations.
 INLINE upb_decoderet upb_vdecode_fast(const char *p) {
-  // Use nobranch2 on 64-bit, branch32 on 32-bit.
  if (sizeof(long) == 8)
    return upb_vdecode_check2_massimino(p);
  else
-    return upb_vdecode_branch32(p);
+    return upb_vdecode_check2_branch32(p);
 }

 INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {
@ -154,9 +114,9 @@ INLINE size_t upb_vencode64(uint64_t val, char *buf) {
  if (val == 0) { buf[0] = 0; return 1; }
  size_t i = 0;
  while (val) {
-    uint8_t byte = val & 0x7f;
+    uint8_t byte = val & 0x7fU;
    val >>= 7;
-    if (val) byte |= 0x80;
+    if (val) byte |= 0x80U;
    buf[i++] = byte;
  }
  return i;
@ -169,7 +129,7 @@ INLINE uint64_t upb_vencode32(uint32_t val) {
  uint64_t ret = 0;
  assert(bytes <= 5);
  memcpy(&ret, buf, bytes);
-  assert(ret <= 0xffffffffff);
+  assert(ret <= 0xffffffffffU);
  return ret;
 }

--- a/upb/refcount.c
+++ b/upb/refcount.c
@ -1,236 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2012 Google Inc.  See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- */
-
-#include <stdlib.h>
-#include "upb/refcount.h"
-
-// TODO(haberman): require client to define these if ref debugging is on.
-#ifndef UPB_LOCK
-#define UPB_LOCK
-#endif
-
-#ifndef UPB_UNLOCK
-#define UPB_UNLOCK
-#endif
-
-/* arch-specific atomic primitives  *******************************************/
-
-#ifdef UPB_THREAD_UNSAFE  //////////////////////////////////////////////////////
-
-INLINE void upb_atomic_inc(uint32_t *a) { (*a)++; }
-INLINE bool upb_atomic_dec(uint32_t *a) { return --(*a) == 0; }
-
-#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4 ///////////////////
-
-INLINE void upb_atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); }
-INLINE bool upb_atomic_dec(uint32_t *a) {
-  return __sync_sub_and_fetch(a, 1) == 0;
-}
-
-#elif defined(WIN32) ///////////////////////////////////////////////////////////
-
-#include <Windows.h>
-
-INLINE void upb_atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); }
-INLINE bool upb_atomic_dec(upb_atomic_t *a) {
-  return InterlockedDecrement(&a->val) == 0;
-}
-
-#else
-#error Atomic primitives not defined for your platform/CPU.  \
-       Implement them or compile with UPB_THREAD_UNSAFE.
-#endif
-
-// Reserved index values.
-#define UPB_INDEX_UNDEFINED UINT16_MAX
-#define UPB_INDEX_NOT_IN_STACK (UINT16_MAX - 1)
-
-static void upb_refcount_merge(upb_refcount *r, upb_refcount *from) {
-  if (upb_refcount_merged(r, from)) return;
-  *r->count += *from->count;
-  free(from->count);
-  upb_refcount *base = from;
-
-  // Set all refcount pointers in the "from" chain to the merged refcount.
-  do { from->count = r->count; } while ((from = from->next) != base);
-
-  // Merge the two circularly linked lists by swapping their next pointers.
-  upb_refcount *tmp = r->next;
-  r->next = base->next;
-  base->next = tmp;
-}
-
-// Tarjan's algorithm, see:
-//   http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
-
-typedef struct {
-  int index;
-  upb_refcount **stack;
-  int stack_len;
-  upb_getsuccessors *func;
-} upb_tarjan_state;
-
-static void upb_refcount_dofindscc(upb_refcount *obj, upb_tarjan_state *state);
-
-void upb_refcount_visit(upb_refcount *obj, upb_refcount *subobj, void *_state) {
-  upb_tarjan_state *state = _state;
-  if (subobj->index == UPB_INDEX_UNDEFINED) {
-    // Subdef has not yet been visited; recurse on it.
-    upb_refcount_dofindscc(subobj, state);
-    obj->lowlink = UPB_MIN(obj->lowlink, subobj->lowlink);
-  } else if (subobj->index != UPB_INDEX_NOT_IN_STACK) {
-    // Subdef is in the stack and hence in the current SCC.
-    obj->lowlink = UPB_MIN(obj->lowlink, subobj->index);
-  }
-}
-
-static void upb_refcount_dofindscc(upb_refcount *obj, upb_tarjan_state *state) {
-  obj->index = state->index;
-  obj->lowlink = state->index;
-  state->index++;
-  state->stack[state->stack_len++] = obj;
-
-  state->func(obj, state);  // Visit successors.
-
-  if (obj->lowlink == obj->index) {
-    upb_refcount *scc_obj;
-    while ((scc_obj = state->stack[--state->stack_len]) != obj) {
-      upb_refcount_merge(obj, scc_obj);
-      scc_obj->index = UPB_INDEX_NOT_IN_STACK;
-    }
-    obj->index = UPB_INDEX_NOT_IN_STACK;
-  }
-}
-
-bool upb_refcount_findscc(upb_refcount **refs, int n, upb_getsuccessors *func) {
-  // TODO(haberman): allocate less memory.  We can't use n as a bound because
-  // it doesn't include fielddefs.  Could either use a dynamically-resizing
-  // array or think of some other way.
-  upb_tarjan_state state = {0, malloc(UINT16_MAX * sizeof(void*)), 0, func};
-  if (state.stack == NULL) return false;
-  for (int i = 0; i < n; i++)
-    if (refs[i]->index == UPB_INDEX_UNDEFINED)
-      upb_refcount_dofindscc(refs[i], &state);
-  free(state.stack);
-  return true;
-}
-
-#ifdef UPB_DEBUG_REFS
-static void upb_refcount_track(const upb_refcount *r, const void *owner) {
-  // Caller must not already own a ref.
-  assert(upb_inttable_lookup(r->refs, (uintptr_t)owner) == NULL);
-
-  // If a ref is leaked we want to blame the leak on the whoever leaked the
-  // ref, not on who originally allocated the refcounted object.  We accomplish
-  // this as follows.  When a ref is taken in DEBUG_REFS mode, we malloc() some
-  // memory and arrange setup pointers like so:
-  //
-  //   upb_refcount
-  //   +----------+  +---------+
-  //   | count    |<-+         |
-  //   +----------+       +----------+
-  //   | table    |---X-->| malloc'd |
-  //   +----------+       | memory   |
-  //                      +----------+
-  //
-  // Since the "malloc'd memory" is allocated inside of "ref" and free'd in
-  // unref, it will cause a leak if not unref'd.  And since the leaked memory
-  // points to the object itself, the object will be considered "indirectly
-  // lost" by tools like Valgrind and not shown unless requested (which is good
-  // because the object's creator may not be responsible for the leak).  But we
-  // have to hide the pointer marked "X" above from Valgrind, otherwise the
-  // malloc'd memory will appear to be indirectly leaked and the object itself
-  // will still be considered the primary leak.  We hide this pointer from
-  // Valgrind (et all) by doing a bitwise not on it.
-  const upb_refcount **target = malloc(sizeof(void*));
-  uintptr_t obfuscated = ~(uintptr_t)target;
-  *target = r;
-  upb_inttable_insert(r->refs, (uintptr_t)owner, upb_value_uint64(obfuscated));
-}
-
-static void upb_refcount_untrack(const upb_refcount *r, const void *owner) {
-  upb_value v;
-  bool success = upb_inttable_remove(r->refs, (uintptr_t)owner, &v);
-  assert(success);
-  if (success) {
-    // Must un-obfuscate the pointer (see above).
-    free((void*)(~upb_value_getuint64(v)));
-  }
-}
-#endif
-
-
-/* upb_refcount  **************************************************************/
-
-bool upb_refcount_init(upb_refcount *r, const void *owner) {
-  (void)owner;
-  r->count = malloc(sizeof(uint32_t));
-  if (!r->count) return false;
-  // Initializing this here means upb_refcount_findscc() can only run once for
-  // each refcount; may need to revise this to be more flexible.
-  r->index = UPB_INDEX_UNDEFINED;
-  r->next = r;
-#ifdef UPB_DEBUG_REFS
-  // We don't detect malloc() failures for UPB_DEBUG_REFS.
-  r->refs = malloc(sizeof(*r->refs));
-  upb_inttable_init(r->refs);
-  *r->count = 0;
-  upb_refcount_ref(r, owner);
-#else
-  *r->count = 1;
-#endif
-  return true;
-}
-
-void upb_refcount_uninit(upb_refcount *r) {
-  (void)r;
-#ifdef UPB_DEBUG_REFS
-  assert(upb_inttable_count(r->refs) == 0);
-  upb_inttable_uninit(r->refs);
-  free(r->refs);
-#endif
-}
-
-// Thread-safe operations //////////////////////////////////////////////////////
-
-void upb_refcount_ref(const upb_refcount *r, const void *owner) {
-  (void)owner;
-  upb_atomic_inc(r->count);
-#ifdef UPB_DEBUG_REFS
-  UPB_LOCK;
-  upb_refcount_track(r, owner);
-  UPB_UNLOCK;
-#endif
-}
-
-bool upb_refcount_unref(const upb_refcount *r, const void *owner) {
-  (void)owner;
-  bool ret = upb_atomic_dec(r->count);
-#ifdef UPB_DEBUG_REFS
-  UPB_LOCK;
-  upb_refcount_untrack(r, owner);
-  UPB_UNLOCK;
-#endif
-  if (ret) free(r->count);
-  return ret;
-}
-
-void upb_refcount_donateref(
-    const upb_refcount *r, const void *from, const void *to) {
-  (void)r; (void)from; (void)to;
-  assert(from != to);
-#ifdef UPB_DEBUG_REFS
-  UPB_LOCK;
-  upb_refcount_track(r, to);
-  upb_refcount_untrack(r, from);
-  UPB_UNLOCK;
-#endif
-}
-
-bool upb_refcount_merged(const upb_refcount *r, const upb_refcount *r2) {
-  return r->count == r2->count;
-}
--- a/upb/refcount.h
+++ b/upb/refcount.h
@ -1,73 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Google Inc.  See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- *
- * A thread-safe refcount that can optionally track references for debugging
- * purposes.  It helps avoid circular references by allowing a
- * strongly-connected component in the graph to share a refcount.
- *
- * This interface is internal to upb.
- */
-
-#ifndef UPB_REFCOUNT_H_
-#define UPB_REFCOUNT_H_
-
-#include <stdbool.h>
-#include <stdint.h>
-#include "upb/table.h"
-
-#ifndef NDEBUG
-#define UPB_DEBUG_REFS
-#endif
-
-typedef struct _upb_refcount {
-  uint32_t *count;
-  struct _upb_refcount *next;  // Circularly-linked list of this SCC.
-  uint16_t index;    // For SCC algorithm.
-  uint16_t lowlink;  // For SCC algorithm.
-#ifdef UPB_DEBUG_REFS
-  // Make this a pointer so that we can modify it inside of const methods
-  // without ugly casts.
-  upb_inttable *refs;
-#endif
-} upb_refcount;
-
-// NON THREAD SAFE operations //////////////////////////////////////////////////
-
-// Initializes the refcount with a single ref for the given owner.  Returns
-// NULL if memory could not be allocated.
-bool upb_refcount_init(upb_refcount *r, const void *owner);
-
-// Uninitializes the refcount.  May only be called after unref() returns true.
-void upb_refcount_uninit(upb_refcount *r);
-
-// Finds strongly-connected components among some set of objects and merges all
-// refcounts that share a SCC.  The given function will be called when the
-// algorithm needs to visit children of a particular object; the function
-// should call upb_refcount_visit() once for each child obj.
-//
-// Returns false if memory allocation failed.
-typedef void upb_getsuccessors(upb_refcount *obj, void*);
-bool upb_refcount_findscc(upb_refcount **objs, int n, upb_getsuccessors *func);
-void upb_refcount_visit(upb_refcount *obj, upb_refcount *subobj, void *closure);
-
-// Thread-safe operations //////////////////////////////////////////////////////
-
-// Increases the ref count, the new ref is owned by "owner" which must not
-// already own a ref.  Circular reference chains are not allowed.
-void upb_refcount_ref(const upb_refcount *r, const void *owner);
-
-// Release a ref owned by owner, returns true if that was the last ref.
-bool upb_refcount_unref(const upb_refcount *r, const void *owner);
-
-// Moves an existing ref from ref_donor to new_owner, without changing the
-// overall ref count.
-void upb_refcount_donateref(
-    const upb_refcount *r, const void *from, const void *to);
-
-// Returns true if these two objects share a refcount.
-bool upb_refcount_merged(const upb_refcount *r, const upb_refcount *r2);
-
-#endif  // UPB_REFCOUNT_H_
--- a/upb/refcounted.c
+++ b/upb/refcounted.c
@ -0,0 +1,776 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2012 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Our key invariants are:
+ * 1. reference cycles never span groups
+ * 2. for ref2(to, from), we increment to's count iff group(from) != group(to)
+ *
+ * The previous two are how we avoid leaking cycles.  Other important
+ * invariants are:
+ * 3. for mutable objects "from" and "to", if there exists a ref2(to, from)
+ *    this implies group(from) == group(to).  (In practice, what we implement
+ *    is even stronger; "from" and "to" will share a group if there has *ever*
+ *    been a ref2(to, from), but all that is necessary for correctness is the
+ *    weaker one).
+ * 4. mutable and immutable objects are never in the same group.
+ */
+
+#include "upb/refcounted.h"
+
+#include <setjmp.h>
+#include <stdlib.h>
+
+uint32_t static_refcount = 1;
+
+/* arch-specific atomic primitives  *******************************************/
+
+#ifdef UPB_THREAD_UNSAFE  //////////////////////////////////////////////////////
+
+static void atomic_inc(uint32_t *a) { (*a)++; }
+static bool atomic_dec(uint32_t *a) { return --(*a) == 0; }
+
+#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4 ///////////////////
+
+static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); }
+static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; }
+
+#elif defined(WIN32) ///////////////////////////////////////////////////////////
+
+#include <Windows.h>
+
+static void atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); }
+static bool atomic_dec(upb_atomic_t *a) {
+  return InterlockedDecrement(&a->val) == 0;
+}
+
+#else
+#error Atomic primitives not defined for your platform/CPU.  \
+       Implement them or compile with UPB_THREAD_UNSAFE.
+#endif
+
+
+/* Reference tracking (debug only) ********************************************/
+
+#ifdef UPB_DEBUG_REFS
+
+#ifdef UPB_THREAD_UNSAFE
+
+static void upb_lock() {}
+static void upb_unlock() {}
+
+#else
+
+// User must define functions that lock/unlock a global mutex and link this
+// file against them.
+void upb_lock();
+void upb_unlock();
+
+#endif
+
+// UPB_DEBUG_REFS mode counts on being able to malloc() memory in some
+// code-paths that can normally never fail, like upb_refcounted_ref().  Since
+// we have no way to propagage out-of-memory errors back to the user, and since
+// these errors can only occur in UPB_DEBUG_REFS mode, we immediately fail.
+#define CHECK_OOM(predicate) assert(predicate)
+
+typedef struct {
+  const upb_refcounted *obj;  // Object we are taking a ref on.
+  int count;  // How many refs there are (duplicates only allowed for ref2).
+  bool is_ref2;
+} trackedref;
+
+trackedref *trackedref_new(const upb_refcounted *obj, bool is_ref2) {
+  trackedref *ret = malloc(sizeof(*ret));
+  CHECK_OOM(ret);
+  ret->obj = obj;
+  ret->count = 1;
+  ret->is_ref2 = is_ref2;
+  return ret;
+}
+
+// A reversible function for obfuscating a uintptr_t.
+// This depends on sizeof(uintptr_t) <= sizeof(uint64_t), so would fail
+// on 128-bit machines.
+static uintptr_t obfuscate(const void *x) { return ~(uintptr_t)x; }
+
+static upb_value obfuscate_v(const void *x) {
+  return upb_value_uint64(obfuscate(x));
+}
+
+static const void *unobfuscate_v(upb_value x) {
+  return (void*)~upb_value_getuint64(x);
+}
+
+//
+// Stores tracked references according to the following scheme:
+//   (upb_inttable)reftracks = {
+//     (void*)owner -> (upb_inttable*) = {
+//       obfuscate((upb_refcounted*)obj) -> obfuscate((trackedref*)is_ref2)
+//     }
+//   }
+//
+// obfuscate() is a function that hides the link from the heap checker, so
+// that it is not followed for the purposes of deciding what has "indirectly
+// leaked."  Even though we have a pointer to the trackedref*, we want it to
+// appear leaked if it is not freed.
+//
+// This scheme gives us the following desirable properties:
+//
+//   1. We can easily determine whether an (owner->obj) ref already exists
+//      and error out if a duplicate ref is taken.
+//
+//   2. Because the trackedref is allocated with malloc() at the point that
+//      the ref is taken, that memory will be leaked if the ref is not released.
+//      Because the malloc'd memory points to the refcounted object, the object
+//      itself will only be considered "indirectly leaked" by smart memory
+//      checkers like Valgrind.  This will correctly blame the ref leaker
+//      instead of the innocent code that allocated the object to begin with.
+//
+//   3. We can easily enumerate all of the ref2 refs for a given owner, which
+//      allows us to double-check that the object's visit() function is
+//      correctly implemented.
+//
+static upb_inttable reftracks = UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR);
+
+static upb_inttable *trygettab(const void *p) {
+  const upb_value *v = upb_inttable_lookupptr(&reftracks, p);
+  return v ? upb_value_getptr(*v) : NULL;
+}
+
+// Gets or creates the tracking table for the given owner.
+static upb_inttable *gettab(const void *p) {
+  upb_inttable *tab = trygettab(p);
+  if (tab == NULL) {
+    tab = malloc(sizeof(*tab));
+    CHECK_OOM(tab);
+    upb_inttable_init(tab, UPB_CTYPE_UINT64);
+    upb_inttable_insertptr(&reftracks, p, upb_value_ptr(tab));
+  }
+  return tab;
+}
+
+static void track(const upb_refcounted *r, const void *owner, bool ref2) {
+  upb_lock();
+  upb_inttable *refs = gettab(owner);
+  const upb_value *v = upb_inttable_lookup(refs, obfuscate(r));
+  if (v) {
+    trackedref *ref = (trackedref*)unobfuscate_v(*v);
+    // Since we allow multiple ref2's for the same to/from pair without
+    // allocating separate memory for each one, we lose the fine-grained
+    // tracking behavior we get with regular refs.  Since ref2s only happen
+    // inside upb, we'll accept this limitation until/unless there is a really
+    // difficult upb-internal bug that can't be figured out without it.
+    assert(ref2);
+    assert(ref->is_ref2);
+    ref->count++;
+  } else {
+    trackedref *ref = trackedref_new(r, ref2);
+    bool ok = upb_inttable_insert(refs, obfuscate(r), obfuscate_v(ref));
+    CHECK_OOM(ok);
+  }
+  upb_unlock();
+}
+
+static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
+  upb_lock();
+  upb_inttable *refs = gettab(owner);
+  const upb_value *v = upb_inttable_lookup(refs, obfuscate(r));
+  // This assert will fail if an owner attempts to release a ref it didn't have.
+  assert(v);
+  trackedref *ref = (trackedref*)unobfuscate_v(*v);
+  assert(ref->is_ref2 == ref2);
+  if (--ref->count == 0) {
+    free(ref);
+    upb_inttable_remove(refs, obfuscate(r), NULL);
+    if (upb_inttable_count(refs) == 0) {
+      upb_inttable_uninit(refs);
+      free(refs);
+      upb_inttable_removeptr(&reftracks, owner, NULL);
+    }
+  }
+  upb_unlock();
+}
+
+static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
+  upb_lock();
+  upb_inttable *refs = gettab(owner);
+  const upb_value *v = upb_inttable_lookup(refs, obfuscate(r));
+  assert(v);
+  trackedref *ref = (trackedref*)unobfuscate_v(*v);
+  assert(ref->obj == r);
+  assert(ref->is_ref2 == ref2);
+  upb_unlock();
+}
+
+// Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that
+// originate from the given owner.
+static void getref2s(const upb_refcounted *owner, upb_inttable *tab) {
+  upb_lock();
+  upb_inttable *refs = trygettab(owner);
+  if (refs) {
+    upb_inttable_iter i;
+    upb_inttable_begin(&i, refs);
+    for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+      trackedref *ref = (trackedref*)unobfuscate_v(upb_inttable_iter_value(&i));
+      if (ref->is_ref2) {
+        upb_value count = upb_value_int32(ref->count);
+        bool ok = upb_inttable_insertptr(tab, ref->obj, count);
+        CHECK_OOM(ok);
+      }
+    }
+  }
+  upb_unlock();
+}
+
+typedef struct {
+  upb_inttable ref2;
+  const upb_refcounted *obj;
+} check_state;
+
+static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj,
+                        void *closure) {
+  check_state *s = closure;
+  assert(obj == s->obj);
+  assert(subobj);
+  upb_inttable *ref2 = &s->ref2;
+  upb_value v;
+  bool removed = upb_inttable_removeptr(ref2, subobj, &v);
+  // The following assertion will fail if the visit() function visits a subobj
+  // that it did not have a ref2 on, or visits the same subobj too many times.
+  assert(removed);
+  int32_t newcount = upb_value_getint32(v) - 1;
+  if (newcount > 0) {
+    upb_inttable_insert(ref2, (uintptr_t)subobj, upb_value_int32(newcount));
+  }
+}
+
+static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
+                  void *closure) {
+  // In DEBUG_REFS mode we know what existing ref2 refs there are, so we know
+  // exactly the set of nodes that visit() should visit.  So we verify visit()'s
+  // correctness here.
+  check_state state;
+  state.obj = r;
+  bool ok = upb_inttable_init(&state.ref2, UPB_CTYPE_INT32);
+  CHECK_OOM(ok);
+  getref2s(r, &state.ref2);
+
+  // This should visit any children in the ref2 table.
+  if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state);
+
+  // This assertion will fail if the visit() function missed any children.
+  assert(upb_inttable_count(&state.ref2) == 0);
+  upb_inttable_uninit(&state.ref2);
+  if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
+}
+
+#else
+
+static void track(const upb_refcounted *r, const void *owner, bool ref2) {
+  UPB_UNUSED(r);
+  UPB_UNUSED(owner);
+  UPB_UNUSED(ref2);
+}
+
+static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
+  UPB_UNUSED(r);
+  UPB_UNUSED(owner);
+  UPB_UNUSED(ref2);
+}
+
+static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
+  UPB_UNUSED(r);
+  UPB_UNUSED(owner);
+  UPB_UNUSED(ref2);
+}
+
+static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
+                  void *closure) {
+  if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
+}
+
+#endif  // UPB_DEBUG_REFS
+
+
+/* freeze() *******************************************************************/
+
+// The freeze() operation is by far the most complicated part of this scheme.
+// We compute strongly-connected components and then mutate the graph such that
+// we preserve the invariants documented at the top of this file.  And we must
+// handle out-of-memory errors gracefully (without leaving the graph
+// inconsistent), which adds to the fun.
+
+// The state used by the freeze operation (shared across many functions).
+typedef struct {
+  int depth;
+  int maxdepth;
+  uint64_t index;
+  // Maps upb_refcounted* -> attributes (color, etc).  attr layout varies by
+  // color.
+  upb_inttable objattr;
+  upb_inttable stack;   // stack of upb_refcounted* for Tarjan's algorithm.
+  upb_inttable groups;  // array of uint32_t*, malloc'd refcounts for new groups
+  upb_status *status;
+  jmp_buf err;
+} tarjan;
+
+static void release_ref2(const upb_refcounted *obj,
+                         const upb_refcounted *subobj,
+                         void *closure);
+
+// Node attributes /////////////////////////////////////////////////////////////
+
+// After our analysis phase all nodes will be either GRAY or WHITE.
+
+typedef enum {
+  BLACK = 0,  // Object has not been seen.
+  GRAY,   // Object has been found via a refgroup but may not be reachable.
+  GREEN,  // Object is reachable and is currently on the Tarjan stack.
+  WHITE,  // Object is reachable and has been assigned a group (SCC).
+} color_t;
+
+UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); }
+UPB_NORETURN static void oom(tarjan *t) {
+  upb_status_seterrliteral(t->status, "out of memory");
+  err(t);
+}
+
+uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) {
+  const upb_value *v = upb_inttable_lookupptr(&t->objattr, r);
+  return v ? upb_value_getuint64(*v) : 0;
+}
+
+uint64_t getattr(const tarjan *t, const upb_refcounted *r) {
+  const upb_value *v = upb_inttable_lookupptr(&t->objattr, r);
+  assert(v);
+  return upb_value_getuint64(*v);
+}
+
+void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) {
+  upb_inttable_removeptr(&t->objattr, r, NULL);
+  upb_inttable_insertptr(&t->objattr, r, upb_value_uint64(attr));
+}
+
+static color_t color(tarjan *t, const upb_refcounted *r) {
+  return trygetattr(t, r) & 0x3;  // Color is always stored in the low 2 bits.
+}
+
+static void set_gray(tarjan *t, const upb_refcounted *r) {
+  assert(color(t, r) == BLACK);
+  setattr(t, r, GRAY);
+}
+
+// Pushes an obj onto the Tarjan stack and sets it to GREEN.
+static void push(tarjan *t, const upb_refcounted *r) {
+  assert(color(t, r) == BLACK || color(t, r) == GRAY);
+  // This defines the attr layout for the GREEN state.  "index" and "lowlink"
+  // get 31 bits, which is plenty (limit of 2B objects frozen at a time).
+  setattr(t, r, GREEN | (t->index << 2) | (t->index << 33));
+  if (++t->index == 0x80000000) {
+    upb_status_seterrliteral(t->status, "too many objects to freeze");
+    err(t);
+  }
+  upb_inttable_push(&t->stack, upb_value_ptr((void*)r));
+}
+
+// Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its
+// SCC group.
+static upb_refcounted *pop(tarjan *t) {
+  upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack));
+  assert(color(t, r) == GREEN);
+  // This defines the attr layout for nodes in the WHITE state.
+  // Top of group stack is [group, NULL]; we point at group.
+  setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8);
+  return r;
+}
+
+static void newgroup(tarjan *t) {
+  uint32_t *group = malloc(sizeof(*group));
+  if (!group) oom(t);
+  // Push group and empty group leader (we'll fill in leader later).
+  if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) ||
+      !upb_inttable_push(&t->groups, upb_value_ptr(NULL))) {
+    free(group);
+    oom(t);
+  }
+  *group = 0;
+}
+
+static uint32_t idx(tarjan *t, const upb_refcounted *r) {
+  assert(color(t, r) == GREEN);
+  return (getattr(t, r) >> 2) & 0x7FFFFFFF;
+}
+
+static uint32_t lowlink(tarjan *t, const upb_refcounted *r) {
+  if (color(t, r) == GREEN) {
+    return getattr(t, r) >> 33;
+  } else {
+    return UINT32_MAX;
+  }
+}
+
+static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) {
+  assert(color(t, r) == GREEN);
+  setattr(t, r, ((uint64_t)lowlink << 33) | (getattr(t, r) & 0x1FFFFFFFF));
+}
+
+uint32_t *group(tarjan *t, upb_refcounted *r) {
+  assert(color(t, r) == WHITE);
+  uint64_t groupnum = getattr(t, r) >> 8;
+  const upb_value *v = upb_inttable_lookup(&t->groups, groupnum);
+  assert(v);
+  return upb_value_getptr(*v);
+}
+
+// If the group leader for this object's group has not previously been set,
+// the given object is assigned to be its leader.
+static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) {
+  assert(color(t, r) == WHITE);
+  uint64_t leader_slot = (getattr(t, r) >> 8) + 1;
+  const upb_value *v = upb_inttable_lookup(&t->groups, leader_slot);
+  assert(v);
+  if (upb_value_getptr(*v)) {
+    return upb_value_getptr(*v);
+  } else {
+    upb_inttable_remove(&t->groups, leader_slot, NULL);
+    upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r));
+    return r;
+  }
+}
+
+
+// Tarjan's algorithm //////////////////////////////////////////////////////////
+
+// See:
+//   http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
+static void do_tarjan(const upb_refcounted *obj, tarjan *t);
+
+static void tarjan_visit(const upb_refcounted *obj,
+                         const upb_refcounted *subobj,
+                         void *closure) {
+  tarjan *t = closure;
+  if (++t->depth > t->maxdepth) {
+    upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth);
+    err(t);
+  } else if (subobj->is_frozen || color(t, subobj) == WHITE) {
+    // Do nothing: we don't want to visit or color already-frozen nodes,
+    // and WHITE nodes have already been assigned a SCC.
+  } else if (color(t, subobj) < GREEN) {
+    // Subdef has not yet been visited; recurse on it.
+    do_tarjan(subobj, t);
+    set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj)));
+  } else if (color(t, subobj) == GREEN) {
+    // Subdef is in the stack and hence in the current SCC.
+    set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj)));
+  }
+  --t->depth;
+}
+
+static void do_tarjan(const upb_refcounted *obj, tarjan *t) {
+  if (color(t, obj) == BLACK) {
+    // We haven't seen this object's group; mark the whole group GRAY.
+    const upb_refcounted *o = obj;
+    do { set_gray(t, o); } while ((o = o->next) != obj);
+  }
+
+  push(t, obj);
+  visit(obj, tarjan_visit, t);
+  if (lowlink(t, obj) == idx(t, obj)) {
+    newgroup(t);
+    while (pop(t) != obj)
+      ;
+  }
+}
+
+
+// freeze() ////////////////////////////////////////////////////////////////////
+
+static void crossref(const upb_refcounted *r, const upb_refcounted *subobj,
+                     void *_t) {
+  tarjan *t = _t;
+  assert(color(t, r) > BLACK);
+  if (color(t, subobj) > BLACK && r->group != subobj->group) {
+    // Previously this ref was not reflected in subobj->group because they
+    // were in the same group; now that they are split a ref must be taken.
+    atomic_inc(subobj->group);
+  }
+}
+
+static bool freeze(upb_refcounted *const*roots, int n, upb_status *s) {
+  volatile bool ret = false;
+
+  // We run in two passes so that we can allocate all memory before performing
+  // any mutation of the input -- this allows us to leave the input unchanged
+  // in the case of memory allocation failure.
+  tarjan t;
+  t.index = 0;
+  t.depth = 0;
+  t.maxdepth = UPB_MAX_TYPE_DEPTH * 2;  // May want to make this a parameter.
+  t.status = s;
+  if (!upb_inttable_init(&t.objattr, UPB_CTYPE_UINT64)) goto err1;
+  if (!upb_inttable_init(&t.stack, UPB_CTYPE_PTR)) goto err2;
+  if (!upb_inttable_init(&t.groups, UPB_CTYPE_PTR)) goto err3;
+  if (setjmp(t.err) != 0) goto err4;
+
+
+  for (int i = 0; i < n; i++) {
+    if (color(&t, roots[i]) < GREEN) {
+      do_tarjan(roots[i], &t);
+    }
+  }
+
+  // If we've made it this far, no further errors are possible so it's safe to
+  // mutate the objects without risk of leaving them in an inconsistent state.
+  ret = true;
+
+  // The transformation that follows requires care.  The preconditions are:
+  // - all objects in attr map are WHITE or GRAY, and are in mutable groups
+  //   (groups of all mutable objs)
+  // - no ref2(to, from) refs have incremented count(to) if both "to" and
+  //   "from" are in our attr map (this follows from invariants (2) and (3))
+
+  // Pass 1: we remove WHITE objects from their mutable groups, and add them to
+  // new groups  according to the SCC's we computed.  These new groups will
+  // consist of only frozen objects.  None will be immediately collectible,
+  // because WHITE objects are by definition reachable from one of "roots",
+  // which the caller must own refs on.
+  upb_inttable_iter i;
+  upb_inttable_begin(&i, &t.objattr);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&i);
+    // Since removal from a singly-linked list requires access to the object's
+    // predecessor, we consider obj->next instead of obj for moving.  With the
+    // while() loop we guarantee that we will visit every node's predecessor.
+    // Proof:
+    //  1. every node's predecessor is in our attr map.
+    //  2. though the loop body may change a node's predecessor, it will only
+    //     change it to be the node we are currently operating on, so with a
+    //     while() loop we guarantee ourselves the chance to remove each node.
+    while (color(&t, obj->next) == WHITE &&
+           group(&t, obj->next) != obj->next->group) {
+      // Remove from old group.
+      upb_refcounted *move = obj->next;
+      if (obj == move) {
+        // Removing the last object from a group.
+        assert(*obj->group == obj->individual_count);
+        free(obj->group);
+      } else {
+        obj->next = move->next;
+        // This may decrease to zero; we'll collect GRAY objects (if any) that
+        // remain in the group in the third pass.
+        assert(*move->group >= move->individual_count);
+        *move->group -= move->individual_count;
+      }
+
+      // Add to new group.
+      upb_refcounted *leader = groupleader(&t, move);
+      if (move == leader) {
+        // First object added to new group is its leader.
+        move->group = group(&t, move);
+        move->next = move;
+        *move->group = move->individual_count;
+      } else {
+        // Group already has at least one object in it.
+        assert(leader->group == group(&t, move));
+        move->group = group(&t, move);
+        move->next = leader->next;
+        leader->next = move;
+        *move->group += move->individual_count;
+      }
+
+      move->is_frozen = true;
+    }
+  }
+
+  // Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must
+  // increment count(to) if group(obj) != group(to) (which could now be the
+  // case if "to" was just frozen).
+  upb_inttable_begin(&i, &t.objattr);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&i);
+    visit(obj, crossref, &t);
+  }
+
+  // Pass 3: GRAY objects are collected if their group's refcount dropped to
+  // zero when we removed its white nodes.  This can happen if they had only
+  // been kept alive by virtue of sharing a group with an object that was just
+  // frozen.
+  //
+  // It is important that we do this last, since the GRAY object's free()
+  // function could call unref2() on just-frozen objects, which will decrement
+  // refs that were added in pass 2.
+  upb_inttable_begin(&i, &t.objattr);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&i);
+    if (obj->group == NULL || *obj->group == 0) {
+      if (obj->group) {
+        // We eagerly free() the group's count (since we can't easily determine
+        // the group's remaining size it's the easiest way to ensure it gets
+        // done).
+        free(obj->group);
+
+        // Visit to release ref2's (done in a separate pass since release_ref2
+        // depends on o->group being unmodified so it can test merged()).
+        upb_refcounted *o = obj;
+        do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj);
+
+        // Mark "group" fields as NULL so we know to free the objects later in
+        // this loop, but also don't try to delete the group twice.
+        o = obj;
+        do { o->group = NULL; } while ((o = o->next) != obj);
+      }
+      obj->vtbl->free(obj);
+    }
+  }
+
+err4:
+  if (!ret) {
+    upb_inttable_begin(&i, &t.groups);
+    for(; !upb_inttable_done(&i); upb_inttable_next(&i))
+      free(upb_value_getptr(upb_inttable_iter_value(&i)));
+  }
+  upb_inttable_uninit(&t.groups);
+err3:
+  upb_inttable_uninit(&t.stack);
+err2:
+  upb_inttable_uninit(&t.objattr);
+err1:
+  return ret;
+}
+
+
+/* Misc internal functions  ***************************************************/
+
+static bool merged(const upb_refcounted *r, const upb_refcounted *r2) {
+  return r->group == r2->group;
+}
+
+static void merge(upb_refcounted *r, upb_refcounted *from) {
+  if (merged(r, from)) return;
+  *r->group += *from->group;
+  free(from->group);
+  upb_refcounted *base = from;
+
+  // Set all refcount pointers in the "from" chain to the merged refcount.
+  //
+  // TODO(haberman): this linear algorithm can result in an overall O(n^2) bound
+  // if the user continuously extends a group by one object.  Prevent this by
+  // using one of the techniques in this paper:
+  //     ftp://www.ncedc.org/outgoing/geomorph/dino/orals/p245-tarjan.pdf
+  do { from->group = r->group; } while ((from = from->next) != base);
+
+  // Merge the two circularly linked lists by swapping their next pointers.
+  upb_refcounted *tmp = r->next;
+  r->next = base->next;
+  base->next = tmp;
+}
+
+static void unref(const upb_refcounted *r);
+
+static void release_ref2(const upb_refcounted *obj,
+                         const upb_refcounted *subobj,
+                         void *closure) {
+  UPB_UNUSED(closure);
+  if (!merged(obj, subobj)) {
+    assert(subobj->is_frozen);
+    unref(subobj);
+  }
+  untrack(subobj, obj, true);
+}
+
+static void unref(const upb_refcounted *r) {
+  if (atomic_dec(r->group)) {
+    free(r->group);
+
+    // In two passes, since release_ref2 needs a guarantee that any subobjs
+    // are alive.
+    const upb_refcounted *o = r;
+    do { visit(o, release_ref2, NULL); } while((o = o->next) != r);
+
+    o = r;
+    do {
+      const upb_refcounted *next = o->next;
+      assert(o->is_frozen || o->individual_count == 0);
+      o->vtbl->free((upb_refcounted*)o);
+      o = next;
+    } while(o != r);
+  }
+}
+
+
+/* Public interface ***********************************************************/
+
+bool upb_refcounted_init(upb_refcounted *r,
+                         const struct upb_refcounted_vtbl *vtbl,
+                         const void *owner) {
+  r->next = r;
+  r->vtbl = vtbl;
+  r->individual_count = 0;
+  r->is_frozen = false;
+  r->group = malloc(sizeof(*r->group));
+  if (!r->group) return false;
+  *r->group = 0;
+  upb_refcounted_ref(r, owner);
+  return true;
+}
+
+bool upb_refcounted_isfrozen(const upb_refcounted *r) {
+  return r->is_frozen;
+}
+
+void upb_refcounted_ref(const upb_refcounted *r, const void *owner) {
+  if (!r->is_frozen)
+    ((upb_refcounted*)r)->individual_count++;
+  atomic_inc(r->group);
+  track(r, owner, false);
+}
+
+void upb_refcounted_unref(const upb_refcounted *r, const void *owner) {
+  if (!r->is_frozen)
+    ((upb_refcounted*)r)->individual_count--;
+  unref(r);
+  untrack(r, owner, false);
+}
+
+void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) {
+  assert(!from->is_frozen);  // Non-const pointer implies this.
+  if (r->is_frozen) {
+    atomic_inc(r->group);
+  } else {
+    merge((upb_refcounted*)r, from);
+  }
+  track(r, from, true);
+}
+
+void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) {
+  assert(!from->is_frozen);  // Non-const pointer implies this.
+  if (r->is_frozen) {
+    unref(r);
+  } else {
+    assert(merged(r, from));
+  }
+  untrack(r, from, true);
+}
+
+void upb_refcounted_donateref(
+    const upb_refcounted *r, const void *from, const void *to) {
+  assert(from != to);
+  assert(to != NULL);
+  upb_refcounted_ref(r, to);
+  if (from != NULL)
+    upb_refcounted_unref(r, from);
+}
+
+void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) {
+  checkref(r, owner, false);
+}
+
+bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s) {
+  for (int i = 0; i < n; i++) {
+    assert(!roots[i]->is_frozen);
+  }
+  return freeze(roots, n, s);
+}
--- a/upb/refcounted.h
+++ b/upb/refcounted.h
@ -0,0 +1,180 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009-2012 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * A refcounting scheme that supports circular refs.  It accomplishes this by
+ * partitioning the set of objects into groups such that no cycle spans groups;
+ * we can then reference-count the group as a whole and ignore refs within the
+ * group.  When objects are mutable, these groups are computed very
+ * conservatively; we group any objects that have ever had a link between them.
+ * When objects are frozen, we compute strongly-connected components which
+ * allows us to be precise and only group objects that are actually cyclic.
+ *
+ * This is a mixed C/C++ interface that offers a full API to both languages.
+ * See the top-level README for more information.
+ */
+
+#ifndef UPB_REFCOUNTED_H_
+#define UPB_REFCOUNTED_H_
+
+#include "upb/table.h"
+
+// Reference tracking is designed to be used with a tool like Valgrind; when
+// enabled, it will cause reference leaks to show up as actual memory leaks
+// that are attributed to the code that leaked the ref, *not* the code that
+// originally created the object.
+#ifndef NDEBUG
+#define UPB_DEBUG_REFS
+#endif
+
+struct upb_refcounted_vtbl;
+
+#ifdef __cplusplus
+namespace upb { class RefCounted; }
+typedef upb::RefCounted upb_refcounted;
+extern "C" {
+#else
+struct upb_refcounted;
+typedef struct upb_refcounted upb_refcounted;
+#endif
+
+#ifdef __cplusplus
+
+class upb::RefCounted {
+ public:
+  // Returns true if the given object is frozen.
+  bool IsFrozen() const;
+
+  // Increases the ref count, the new ref is owned by "owner" which must not
+  // already own a ref (and should not itself be a refcounted object if the ref
+  // could possibly be circular; see below).
+  // Thread-safe iff "this" is frozen.
+  void Ref(const void *owner) const;
+
+  // Release a ref that was acquired from upb_refcounted_ref() and collects any
+  // objects it can.
+  void Unref(const void *owner) const;
+
+  // Moves an existing ref from "from" to "to", without changing the overall
+  // ref count.  DonateRef(foo, NULL, owner) is the same as Ref(foo, owner),
+  // but "to" may not be NULL.
+  void DonateRef(const void *from, const void *to) const;
+
+  // Verifies that a ref to the given object is currently held by the given
+  // owner.  Only effective in UPB_DEBUG_REFS builds.
+  void CheckRef(const void *owner) const;
+
+ private:
+  UPB_DISALLOW_POD_OPS(RefCounted);
+#else
+struct upb_refcounted {
+#endif
+  // A single reference count shared by all objects in the group.
+  uint32_t *group;
+
+  // A singly-linked list of all objects in the group.
+  upb_refcounted *next;
+
+  // Table of function pointers for this type.
+  const struct upb_refcounted_vtbl *vtbl;
+
+  // Maintained only when mutable, this tracks the number of refs (but not
+  // ref2's) to this object.  *group should be the sum of all individual_count
+  // in the group.
+  uint32_t individual_count;
+
+  bool is_frozen;
+};
+
+// Native C API.
+bool upb_refcounted_isfrozen(const upb_refcounted *r);
+void upb_refcounted_ref(const upb_refcounted *r, const void *owner);
+void upb_refcounted_unref(const upb_refcounted *r, const void *owner);
+void upb_refcounted_donateref(
+    const upb_refcounted *r, const void *from, const void *to);
+void upb_refcounted_checkref(const upb_refcounted *r, const void *owner);
+
+
+// Internal-to-upb Interface ///////////////////////////////////////////////////
+
+typedef void upb_refcounted_visit(const upb_refcounted *r,
+                                  const upb_refcounted *subobj,
+                                  void *closure);
+
+struct upb_refcounted_vtbl {
+  // Must visit all subobjects that are currently ref'd via upb_refcounted_ref2.
+  // Must be longjmp()-safe.
+  void (*visit)(const upb_refcounted *r, upb_refcounted_visit *visit, void *c);
+
+  // Must free the object and release all references to other objects.
+  void (*free)(upb_refcounted *r);
+};
+
+// Initializes the refcounted with a single ref for the given owner.  Returns
+// false if memory could not be allocated.
+bool upb_refcounted_init(upb_refcounted *r,
+                         const struct upb_refcounted_vtbl *vtbl,
+                         const void *owner);
+
+// Adds a ref from one refcounted object to another ("from" must not already
+// own a ref).  These refs may be circular; cycles will be collected correctly
+// (if conservatively).  These refs do not need to be freed in from's free()
+// function.
+void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from);
+
+// Removes a ref that was acquired from upb_refcounted_ref2(), and collects any
+// object it can.  This is only necessary when "from" no longer points to "r",
+// and not from from's "free" function.
+void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from);
+
+#define upb_ref2(r, from) \
+    upb_refcounted_ref2((const upb_refcounted*)r, (upb_refcounted*)from)
+#define upb_unref2(r, from) \
+    upb_refcounted_unref2((const upb_refcounted*)r, (upb_refcounted*)from)
+
+// Freezes all mutable object reachable by ref2() refs from the given roots.
+// This will split refcounting groups into precise SCC groups, so that
+// refcounting of frozen objects can be more aggressive.  If memory allocation
+// fails or if more than 2**31 mutable objects are reachable from "roots",
+// false is returned and the objects are unchanged.
+//
+// After this operation succeeds, the objects are frozen/const, and may not be
+// used through non-const pointers.  In particular, they may not be passed as
+// the second parameter of upb_refcounted_{ref,unref}2().  On the upside, all
+// operations on frozen refcounteds are threadsafe, and objects will be freed
+// at the precise moment that they become unreachable.
+//
+// Caller must own refs on each object in the "roots" list.
+bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s);
+
+// Shared by all compiled-in refcounted objects.
+extern uint32_t static_refcount;
+
+#define UPB_REFCOUNT_INIT {&static_refcount, NULL, NULL, 0, true}
+
+#ifdef __cplusplus
+}  /* extern "C" */
+
+// C++ Wrappers.
+namespace upb {
+inline bool RefCounted::IsFrozen() const {
+  return upb_refcounted_isfrozen(this);
+}
+inline void RefCounted::Ref(const void *owner) const {
+  upb_refcounted_ref(this, owner);
+}
+inline void RefCounted::Unref(const void *owner) const {
+  upb_refcounted_unref(this, owner);
+}
+inline void RefCounted::DonateRef(const void *from, const void *to) const {
+  upb_refcounted_donateref(this, from, to);
+}
+inline void RefCounted::CheckRef(const void *owner) const {
+  upb_refcounted_checkref(this, owner);
+}
+}  // namespace upb
+#endif
+
+#endif  // UPB_REFCOUNT_H_
--- a/upb/sink.c
+++ b/upb/sink.c
@ -0,0 +1,205 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include "upb/sink.h"
+
+static bool chkstack(upb_sink *s) {
+  if (s->top + 1 >= s->limit) {
+    upb_status_seterrliteral(&s->status, "Nesting too deep.");
+    return false;
+  } else {
+    return true;
+  }
+}
+
+static upb_selector_t getselector(const upb_fielddef *f,
+                                  upb_handlertype_t type) {
+  upb_selector_t selector;
+  bool ok = upb_getselector(f, type, &selector);
+  UPB_ASSERT_VAR(ok, ok);
+  return selector;
+}
+
+void upb_sink_init(upb_sink *s, const upb_handlers *h) {
+  s->limit = &s->stack[UPB_MAX_NESTING];
+  s->top = NULL;
+  s->stack[0].h = h;
+  upb_status_init(&s->status);
+}
+
+void upb_sink_reset(upb_sink *s, void *closure) {
+  s->top = s->stack;
+  s->top->closure = closure;
+}
+
+void upb_sink_uninit(upb_sink *s) {
+  upb_status_uninit(&s->status);
+}
+
+bool upb_sink_startmsg(upb_sink *s) {
+  const upb_handlers *h = s->top->h;
+  upb_startmsg_handler *startmsg = upb_handlers_getstartmsg(h);
+  return startmsg ? startmsg(s->top->closure) : true;
+}
+
+void upb_sink_endmsg(upb_sink *s, upb_status *status) {
+  UPB_UNUSED(status);
+  assert(s->top == s->stack);
+  upb_endmsg_handler *endmsg = upb_handlers_getendmsg(s->top->h);
+  if (endmsg) endmsg(s->top->closure, &s->status);
+}
+
+#define PUTVAL(type, ctype, htype) \
+  bool upb_sink_put ## type(upb_sink *s, const upb_fielddef *f, ctype val) { \
+    upb_selector_t selector; \
+    if (!upb_getselector(f, UPB_HANDLER_ ## htype, &selector)) return false; \
+    upb_ ## type ## _handler *handler = (upb_ ## type ## _handler*) \
+        upb_handlers_gethandler(s->top->h, selector); \
+    if (handler) { \
+      void *data = upb_handlers_gethandlerdata(s->top->h, selector); \
+      if (!handler(s->top->closure, data, val)) return false; \
+    } \
+    return true; \
+  }
+
+PUTVAL(int32,  int32_t,         INT32);
+PUTVAL(int64,  int64_t,         INT64);
+PUTVAL(uint32, uint32_t,        UINT32);
+PUTVAL(uint64, uint64_t,        UINT64);
+PUTVAL(float,  float,           FLOAT);
+PUTVAL(double, double,          DOUBLE);
+PUTVAL(bool,   bool,            BOOL);
+#undef PUTVAL
+
+size_t upb_sink_putstring(upb_sink *s, const upb_fielddef *f,
+                          const char *buf, size_t n) {
+  upb_selector_t selector;
+  if (!upb_getselector(f, UPB_HANDLER_STRING, &selector)) return false;
+  upb_string_handler *handler = (upb_string_handler*)
+      upb_handlers_gethandler(s->top->h, selector);
+  if (handler) {
+    void *data = upb_handlers_gethandlerdata(s->top->h, selector); \
+    return handler(s->top->closure, data, buf, n);
+  }
+  return n;
+}
+
+bool upb_sink_startseq(upb_sink *s, const upb_fielddef *f) {
+  assert(upb_fielddef_isseq(f));
+  if (!chkstack(s)) return false;
+
+  void *subc = s->top->closure;
+  const upb_handlers *h = s->top->h;
+  upb_selector_t selector;
+  if (!upb_getselector(f, UPB_HANDLER_STARTSEQ, &selector)) return false;
+  upb_startfield_handler *startseq =
+      (upb_startfield_handler*)upb_handlers_gethandler(h, selector);
+  if (startseq) {
+    subc = startseq(s->top->closure, upb_handlers_gethandlerdata(h, selector));
+    if (!subc) return false;
+  }
+
+  ++s->top;
+  s->top->end = getselector(f, UPB_HANDLER_ENDSEQ);
+  s->top->h = h;
+  s->top->closure = subc;
+  return true;
+}
+
+bool upb_sink_endseq(upb_sink *s, const upb_fielddef *f) {
+  upb_selector_t selector = s->top->end;
+  assert(selector == getselector(f, UPB_HANDLER_ENDSEQ));
+  --s->top;
+
+  const upb_handlers *h = s->top->h;
+  upb_endfield_handler *endseq =
+      (upb_endfield_handler*)upb_handlers_gethandler(h, selector);
+  return endseq ?
+      endseq(s->top->closure, upb_handlers_gethandlerdata(h, selector)) :
+      true;
+}
+
+bool upb_sink_startstr(upb_sink *s, const upb_fielddef *f, size_t size_hint) {
+  assert(upb_fielddef_isstring(f));
+  if (!chkstack(s)) return false;
+
+  void *subc = s->top->closure;
+  const upb_handlers *h = s->top->h;
+  upb_selector_t selector;
+  if (!upb_getselector(f, UPB_HANDLER_STARTSTR, &selector)) return false;
+  upb_startstr_handler *startstr =
+      (upb_startstr_handler*)upb_handlers_gethandler(h, selector);
+  if (startstr) {
+    subc = startstr(
+        s->top->closure, upb_handlers_gethandlerdata(h, selector), size_hint);
+    if (!subc) return false;
+  }
+
+  ++s->top;
+  s->top->end = getselector(f, UPB_HANDLER_ENDSTR);
+  s->top->h = h;
+  s->top->closure = subc;
+  return true;
+}
+
+bool upb_sink_endstr(upb_sink *s, const upb_fielddef *f) {
+  upb_selector_t selector = s->top->end;
+  assert(selector == getselector(f, UPB_HANDLER_ENDSTR));
+  --s->top;
+
+  const upb_handlers *h = s->top->h;
+  upb_endfield_handler *endstr =
+      (upb_endfield_handler*)upb_handlers_gethandler(h, selector);
+  return endstr ?
+      endstr(s->top->closure, upb_handlers_gethandlerdata(h, selector)) :
+      true;
+}
+
+bool upb_sink_startsubmsg(upb_sink *s, const upb_fielddef *f) {
+  assert(upb_fielddef_issubmsg(f));
+  if (!chkstack(s)) return false;
+
+  const upb_handlers *h = s->top->h;
+  upb_selector_t selector;
+  if (!upb_getselector(f, UPB_HANDLER_STARTSUBMSG, &selector)) return false;
+  upb_startfield_handler *startsubmsg =
+      (upb_startfield_handler*)upb_handlers_gethandler(h, selector);
+  void *subc = s->top->closure;
+
+  if (startsubmsg) {
+    void *data = upb_handlers_gethandlerdata(h, selector);
+    subc = startsubmsg(s->top->closure, data);
+    if (!subc) return false;
+  }
+
+  ++s->top;
+  s->top->end = getselector(f, UPB_HANDLER_ENDSUBMSG);
+  s->top->h = upb_handlers_getsubhandlers(h, f);
+  s->top->closure = subc;
+  upb_sink_startmsg(s);
+  return true;
+}
+
+bool upb_sink_endsubmsg(upb_sink *s, const upb_fielddef *f) {
+  upb_selector_t selector = s->top->end;
+  assert(selector == getselector(f, UPB_HANDLER_ENDSUBMSG));
+
+  upb_endmsg_handler *endmsg = upb_handlers_getendmsg(s->top->h);
+  if (endmsg) endmsg(s->top->closure, &s->status);
+  --s->top;
+
+  const upb_handlers *h = s->top->h;
+  upb_endfield_handler *endfield =
+      (upb_endfield_handler*)upb_handlers_gethandler(h, selector);
+  return endfield ?
+      endfield(s->top->closure, upb_handlers_gethandlerdata(h, selector)) :
+      true;
+}
+
+const upb_handlers *upb_sink_tophandlers(upb_sink *s) {
+  return s->top->h;
+}
--- a/upb/sink.h
+++ b/upb/sink.h
@ -0,0 +1,82 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2010-2012 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * A upb_sink is an object that binds a upb_handlers object to some runtime
+ * state.  It is the object that can actually receive data via the upb_handlers
+ * interface.
+ *
+ * Unlike upb_def and upb_handlers, upb_sink is never frozen, immutable, or
+ * thread-safe.  You can create as many of them as you want, but each one may
+ * only be used in a single thread at a time.
+ *
+ * If we compare with class-based OOP, a you can think of a upb_def as an
+ * abstract base class, a upb_handlers as a concrete derived class, and a
+ * upb_sink as an object (class instance).
+ */
+
+#ifndef UPB_SINK_H
+#define UPB_SINK_H
+
+#include "upb/handlers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* upb_sink *******************************************************************/
+
+typedef struct {
+  upb_selector_t end;  // From the enclosing message (unused at top-level).
+  const upb_handlers *h;
+  void *closure;
+} upb_sink_frame;
+
+typedef struct {
+  upb_sink_frame *top, *limit;
+  upb_sink_frame stack[UPB_MAX_NESTING];
+  upb_status status;
+} upb_sink;
+
+// Caller retains ownership of the handlers object.
+void upb_sink_init(upb_sink *s, const upb_handlers *h);
+
+// Resets the state of the sink so that it is ready to accept new input.
+// Any state from previously received data is discarded.  "Closure" will be
+// used as the top-level closure.
+void upb_sink_reset(upb_sink *s, void *closure);
+
+void upb_sink_uninit(upb_sink *s);
+
+// Returns the handlers at the top of the stack.
+const upb_handlers *upb_sink_tophandlers(upb_sink *s);
+
+// Functions for pushing data into the sink.
+// These return false if processing should stop (either due to error or just
+// to suspend).
+bool upb_sink_startmsg(upb_sink *s);
+void upb_sink_endmsg(upb_sink *s, upb_status *status);
+bool upb_sink_putint32(upb_sink *s, const upb_fielddef *f, int32_t val);
+bool upb_sink_putint64(upb_sink *s, const upb_fielddef *f, int64_t val);
+bool upb_sink_putuint32(upb_sink *s, const upb_fielddef *f, uint32_t val);
+bool upb_sink_putuint64(upb_sink *s, const upb_fielddef *f, uint64_t val);
+bool upb_sink_putfloat(upb_sink *s, const upb_fielddef *f, float val);
+bool upb_sink_putdouble(upb_sink *s, const upb_fielddef *f, double val);
+bool upb_sink_putbool(upb_sink *s, const upb_fielddef *f, bool val);
+bool upb_sink_startstr(upb_sink *s, const upb_fielddef *f, size_t size_hint);
+size_t upb_sink_putstring(upb_sink *s, const upb_fielddef *f, const char *buf,
+                          size_t len);
+bool upb_sink_endstr(upb_sink *s, const upb_fielddef *f);
+bool upb_sink_startsubmsg(upb_sink *s, const upb_fielddef *f);
+bool upb_sink_endsubmsg(upb_sink *s, const upb_fielddef *f);
+bool upb_sink_startseq(upb_sink *s, const upb_fielddef *f);
+bool upb_sink_endseq(upb_sink *s, const upb_fielddef *f);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif
--- a/upb/stdc/error.c
+++ b/upb/stdc/error.c
@ -9,7 +9,6 @@

 #include "upb/stdc/error.h"

-#include <errno.h>
 #include <string.h>

 void upb_status_fromerrno(upb_status *status, int code) {
--- a/upb/stdc/io.c
+++ b/upb/stdc/io.c
@ -7,6 +7,9 @@

 #include "upb/stdc/io.h"

+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
 #include "upb/stdc/error.h"

 // We can make this configurable if necessary.
--- a/upb/symtab.c
+++ b/upb/symtab.c
@ -0,0 +1,326 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2008-2012 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include "upb/symtab.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "upb/bytestream.h"
+
+bool upb_symtab_isfrozen(const upb_symtab *s) {
+  return upb_refcounted_isfrozen(upb_upcast(s));
+}
+
+void upb_symtab_ref(const upb_symtab *s, const void *owner) {
+  upb_refcounted_ref(upb_upcast(s), owner);
+}
+
+void upb_symtab_unref(const upb_symtab *s, const void *owner) {
+  upb_refcounted_unref(upb_upcast(s), owner);
+}
+
+void upb_symtab_donateref(
+    const upb_symtab *s, const void *from, const void *to) {
+  upb_refcounted_donateref(upb_upcast(s), from, to);
+}
+
+void upb_symtab_checkref(const upb_symtab *s, const void *owner) {
+  upb_refcounted_checkref(upb_upcast(s), owner);
+}
+
+static void upb_symtab_free(upb_refcounted *r) {
+  upb_symtab *s = (upb_symtab*)r;
+  upb_strtable_iter i;
+  upb_strtable_begin(&i, &s->symtab);
+  for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+    const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
+    upb_def_unref(def, s);
+  }
+  upb_strtable_uninit(&s->symtab);
+  free(s);
+}
+
+static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_symtab_free};
+
+upb_symtab *upb_symtab_new(const void *owner) {
+  upb_symtab *s = malloc(sizeof(*s));
+  upb_refcounted_init(upb_upcast(s), &vtbl, owner);
+  upb_strtable_init(&s->symtab, UPB_CTYPE_PTR);
+  return s;
+}
+
+const upb_def **upb_symtab_getdefs(const upb_symtab *s, upb_deftype_t type,
+                                   const void *owner, int *n) {
+  int total = upb_strtable_count(&s->symtab);
+  // We may only use part of this, depending on how many symbols are of the
+  // correct type.
+  const upb_def **defs = malloc(sizeof(*defs) * total);
+  upb_strtable_iter iter;
+  upb_strtable_begin(&iter, &s->symtab);
+  int i = 0;
+  for(; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
+    upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
+    assert(def);
+    if(type == UPB_DEF_ANY || def->type == type)
+      defs[i++] = def;
+  }
+  *n = i;
+  if (owner)
+    for(i = 0; i < *n; i++) upb_def_ref(defs[i], owner);
+  return defs;
+}
+
+const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym,
+                                 const void *owner) {
+  const upb_value *v = upb_strtable_lookup(&s->symtab, sym);
+  upb_def *ret = v ? upb_value_getptr(*v) : NULL;
+  if (ret) upb_def_ref(ret, owner);
+  return ret;
+}
+
+const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym,
+                                       const void *owner) {
+  const upb_value *v = upb_strtable_lookup(&s->symtab, sym);
+  upb_def *def = v ? upb_value_getptr(*v) : NULL;
+  upb_msgdef *ret = NULL;
+  if(def && def->type == UPB_DEF_MSG) {
+    ret = upb_downcast_msgdef_mutable(def);
+    upb_def_ref(def, owner);
+  }
+  return ret;
+}
+
+// Given a symbol and the base symbol inside which it is defined, find the
+// symbol's definition in t.
+static upb_def *upb_resolvename(const upb_strtable *t,
+                                const char *base, const char *sym) {
+  if(strlen(sym) == 0) return NULL;
+  if(sym[0] == UPB_SYMBOL_SEPARATOR) {
+    // Symbols starting with '.' are absolute, so we do a single lookup.
+    // Slice to omit the leading '.'
+    const upb_value *v = upb_strtable_lookup(t, sym + 1);
+    return v ? upb_value_getptr(*v) : NULL;
+  } else {
+    // Remove components from base until we find an entry or run out.
+    // TODO: This branch is totally broken, but currently not used.
+    (void)base;
+    assert(false);
+    return NULL;
+  }
+}
+
+const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
+                                  const char *sym, const void *owner) {
+  upb_def *ret = upb_resolvename(&s->symtab, base, sym);
+  if (ret) upb_def_ref(ret, owner);
+  return ret;
+}
+
+// Searches def and its children to find defs that have the same name as any
+// def in "addtab."  Returns true if any where found, and as a side-effect adds
+// duplicates of these defs into addtab.
+//
+// We use a modified depth-first traversal that traverses each SCC (which we
+// already computed) as if it were a single node.  This allows us to traverse
+// the possibly-cyclic graph as if it were a DAG and to dup the correct set of
+// nodes with O(n) time.
+static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab,
+                            const void *new_owner, upb_inttable *seen,
+                            upb_status *s) {
+  // Memoize results of this function for efficiency (since we're traversing a
+  // DAG this is not needed to limit the depth of the search).
+  const upb_value *v = upb_inttable_lookup(seen, (uintptr_t)def);
+  if (v) return upb_value_getbool(*v);
+
+  // Visit submessages for all messages in the SCC.
+  bool need_dup = false;
+  const upb_def *base = def;
+  do {
+    assert(upb_def_isfrozen(def));
+    if (def->type == UPB_DEF_FIELD) continue;
+    const upb_value *v = upb_strtable_lookup(addtab, upb_def_fullname(def));
+    if (v) {
+      // Because we memoize we should not visit a node after we have dup'd it.
+      assert(((upb_def*)upb_value_getptr(*v))->came_from_user);
+      need_dup = true;
+    }
+    const upb_msgdef *m = upb_dyncast_msgdef(def);
+    if (m) {
+      upb_msg_iter i;
+      for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+        upb_fielddef *f = upb_msg_iter_field(&i);
+        if (!upb_fielddef_hassubdef(f)) continue;
+        // |= to avoid short-circuit; we need its side-effects.
+        need_dup |= upb_resolve_dfs(
+            upb_fielddef_subdef(f), addtab, new_owner, seen, s);
+        if (!upb_ok(s)) return false;
+      }
+    }
+  } while ((def = (upb_def*)def->base.next) != base);
+
+  if (need_dup) {
+    // Dup any defs that don't already have entries in addtab.
+    def = base;
+    do {
+      if (def->type == UPB_DEF_FIELD) continue;
+      const char *name = upb_def_fullname(def);
+      if (upb_strtable_lookup(addtab, name) == NULL) {
+        upb_def *newdef = upb_def_dup(def, new_owner);
+        if (!newdef) goto oom;
+        newdef->came_from_user = false;
+        if (!upb_strtable_insert(addtab, name, upb_value_ptr(newdef)))
+          goto oom;
+      }
+    } while ((def = (upb_def*)def->base.next) != base);
+  }
+
+  upb_inttable_insert(seen, (uintptr_t)def, upb_value_bool(need_dup));
+  return need_dup;
+
+oom:
+  upb_status_seterrliteral(s, "out of memory");
+  return false;
+}
+
+bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
+                    upb_status *status) {
+  upb_def **add_defs = NULL;
+  upb_strtable addtab;
+  if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) {
+    upb_status_seterrliteral(status, "out of memory");
+    return false;
+  }
+
+  // Add new defs to table.
+  for (int i = 0; i < n; i++) {
+    upb_def *def = defs[i];
+    if (upb_def_isfrozen(def)) {
+      upb_status_seterrliteral(status, "added defs must be mutable");
+      goto err;
+    }
+    assert(!upb_def_isfrozen(def));
+    const char *fullname = upb_def_fullname(def);
+    if (!fullname) {
+      upb_status_seterrliteral(
+          status, "Anonymous defs cannot be added to a symtab");
+      goto err;
+    }
+    if (upb_strtable_lookup(&addtab, fullname) != NULL) {
+      upb_status_seterrf(status, "Conflicting defs named '%s'", fullname);
+      goto err;
+    }
+    // We need this to back out properly, because if there is a failure we need
+    // to donate the ref back to the caller.
+    def->came_from_user = true;
+    upb_def_donateref(def, ref_donor, s);
+    if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def)))
+      goto oom_err;
+  }
+
+  // Add dups of any existing def that can reach a def with the same name as
+  // one of "defs."
+  upb_inttable seen;
+  if (!upb_inttable_init(&seen, UPB_CTYPE_BOOL)) goto oom_err;
+  upb_strtable_iter i;
+  upb_strtable_begin(&i, &s->symtab);
+  for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+    upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
+    upb_resolve_dfs(def, &addtab, s, &seen, status);
+    if (!upb_ok(status)) goto err;
+  }
+  upb_inttable_uninit(&seen);
+
+  // Now using the table, resolve symbolic references.
+  upb_strtable_begin(&i, &addtab);
+  for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+    upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
+    upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
+    if (!m) continue;
+    // Type names are resolved relative to the message in which they appear.
+    const char *base = upb_def_fullname(upb_upcast(m));
+
+    upb_msg_iter j;
+    for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) {
+      upb_fielddef *f = upb_msg_iter_field(&j);
+      const char *name = upb_fielddef_subdefname(f);
+      if (name) {
+        upb_def *subdef = upb_resolvename(&addtab, base, name);
+        if (subdef == NULL) {
+          upb_status_seterrf(
+              status, "couldn't resolve name '%s' in message '%s'", name, base);
+          goto err;
+        } else if (!upb_fielddef_setsubdef(f, subdef)) {
+          upb_status_seterrf(
+              status, "def '%s' had the wrong type for field '%s'",
+              upb_def_fullname(subdef), upb_fielddef_name(f));
+          goto err;
+        }
+      }
+
+      if (!upb_fielddef_resolvedefault(f)) {
+        upb_byteregion *r = upb_value_getbyteregion(upb_fielddef_default(f));
+        size_t len;
+        const char *ptr = upb_byteregion_getptr(r, 0, &len);
+        upb_status_seterrf(status, "couldn't resolve enum default '%s'", ptr);
+        goto err;
+      }
+    }
+  }
+
+  // We need an array of the defs in addtab, for passing to upb_def_freeze.
+  add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab));
+  if (add_defs == NULL) goto oom_err;
+  upb_strtable_begin(&i, &addtab);
+  for (n = 0; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+    add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&i));
+  }
+
+  if (!upb_def_freeze(add_defs, n, status)) goto err;
+
+  // This must be delayed until all errors have been detected, since error
+  // recovery code uses this table to cleanup defs.
+  upb_strtable_uninit(&addtab);
+
+  // TODO(haberman) we don't properly handle errors after this point (like
+  // OOM in upb_strtable_insert() below).
+  for (int i = 0; i < n; i++) {
+    upb_def *def = add_defs[i];
+    const char *name = upb_def_fullname(def);
+    upb_value v;
+    if (upb_strtable_remove(&s->symtab, name, &v)) {
+      const upb_def *def = upb_value_getptr(v);
+      upb_def_unref(def, s);
+    }
+    bool success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
+    UPB_ASSERT_VAR(success, success == true);
+  }
+  free(add_defs);
+  return true;
+
+oom_err:
+  upb_status_seterrliteral(status, "out of memory");
+err: {
+    // For defs the user passed in, we need to donate the refs back.  For defs
+    // we dup'd, we need to just unref them.
+    upb_strtable_iter i;
+    upb_strtable_begin(&i, &addtab);
+    for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+      upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
+      if (def->came_from_user) {
+        upb_def_donateref(def, s, ref_donor);
+      } else {
+        upb_def_unref(def, s);
+      }
+      def->came_from_user = false;
+    }
+  }
+  upb_strtable_uninit(&addtab);
+  free(add_defs);
+  assert(!upb_ok(status));
+  return false;
+}
--- a/upb/symtab.h
+++ b/upb/symtab.h
@ -0,0 +1,200 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009-2012 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * A symtab (symbol table) stores a name->def map of upb_defs.  Clients could
+ * always create such tables themselves, but upb_symtab has logic for resolving
+ * symbolic references, and in particular, for keeping a whole set of consistent
+ * defs when replacing some subset of those defs.  This logic is nontrivial.
+ *
+ * This is a mixed C/C++ interface that offers a full API to both languages.
+ * See the top-level README for more information.
+ */
+
+#ifndef UPB_SYMTAB_H_
+#define UPB_SYMTAB_H_
+
+#ifdef __cplusplus
+#include <vector>
+
+namespace upb { class SymbolTable; }
+typedef upb::SymbolTable upb_symtab;
+#else
+struct upb_symtab;
+typedef struct upb_symtab upb_symtab;
+#endif
+
+#include "upb/def.h"
+
+#ifdef __cplusplus
+
+class upb::SymbolTable {
+ public:
+  // Returns a new symbol table with a single ref owned by "owner."
+  // Returns NULL if memory allocation failed.
+  static SymbolTable* New(const void* owner);
+
+  // Though not declared as such in C++, upb::RefCounted is the base of
+  // SymbolTable and we can upcast to it.
+  RefCounted* Upcast();
+  const RefCounted* Upcast() const;
+
+  // Functionality from upb::RefCounted.
+  bool IsFrozen() const;
+  void Ref(const void* owner) const;
+  void Unref(const void* owner) const;
+  void DonateRef(const void *from, const void *to) const;
+  void CheckRef(const void *owner) const;
+
+  // Resolves the given symbol using the rules described in descriptor.proto,
+  // namely:
+  //
+  //    If the name starts with a '.', it is fully-qualified.  Otherwise,
+  //    C++-like scoping rules are used to find the type (i.e. first the nested
+  //    types within this message are searched, then within the parent, on up
+  //    to the root namespace).
+  //
+  // If a def is found, the caller owns one ref on the returned def, owned by
+  // owner.  Otherwise returns NULL.
+  const Def* Resolve(const char* base, const char* sym,
+                     const void* owner) const;
+
+  // Finds an entry in the symbol table with this exact name.  If a def is
+  // found, the caller owns one ref on the returned def, owned by owner.
+  // Otherwise returns NULL.
+  const Def* Lookup(const char *sym, const void *owner) const;
+  const MessageDef* LookupMessage(const char *sym, const void *owner) const;
+
+  // Gets an array of pointers to all currently active defs in this symtab.
+  // The caller owns the returned array (which is of length *n) as well as a
+  // ref to each symbol inside (owned by owner).  If type is UPB_DEF_ANY then
+  // defs of all types are returned, otherwise only defs of the required type
+  // are returned.
+  const Def** GetDefs(upb_deftype_t type, const void *owner, int *n) const;
+
+  // Adds the given mutable defs to the symtab, resolving all symbols
+  // (including enum default values) and finalizing the defs.  Only one def per
+  // name may be in the list, but defs can replace existing defs in the symtab.
+  // All defs must have a name -- anonymous defs are not allowed.  Anonymous
+  // defs can still be frozen by calling upb_def_freeze() directly.
+  //
+  // Any existing defs that can reach defs that are being replaced will
+  // themselves be replaced also, so that the resulting set of defs is fully
+  // consistent.
+  //
+  // This logic implemented in this method is a convenience; ultimately it
+  // calls some combination of upb_fielddef_setsubdef(), upb_def_dup(), and
+  // upb_freeze(), any of which the client could call themself.  However, since
+  // the logic for doing so is nontrivial, we provide it here.
+  //
+  // The entire operation either succeeds or fails.  If the operation fails,
+  // the symtab is unchanged, false is returned, and status indicates the
+  // error.  The caller passes a ref on all defs to the symtab (even if the
+  // operation fails).
+  //
+  // TODO(haberman): currently failure will leave the symtab unchanged, but may
+  // leave the defs themselves partially resolved.  Does this matter?  If so we
+  // could do a prepass that ensures that all symbols are resolvable and bail
+  // if not, so we don't mutate anything until we know the operation will
+  // succeed.
+  //
+  // TODO(haberman): since the defs must be mutable, refining a frozen def
+  // requires making mutable copies of the entire tree.  This is wasteful if
+  // only a few messages are changing.  We may want to add a way of adding a
+  // tree of frozen defs to the symtab (perhaps an alternate constructor where
+  // you pass the root of the tree?)
+  bool Add(Def*const* defs, int n, void* ref_donor, upb_status* status);
+
+  bool Add(const std::vector<Def*>& defs, void *owner, Status* status) {
+    return Add((Def*const*)&defs[0], defs.size(), owner, status);
+  }
+
+ private:
+  UPB_DISALLOW_POD_OPS(SymbolTable);
+
+#else
+struct upb_symtab {
+#endif
+  upb_refcounted base;
+  upb_strtable symtab;
+};
+
+// Native C API.
+#ifdef __cplusplus
+extern "C" {
+#endif
+// From upb_refcounted.
+bool upb_symtab_isfrozen(const upb_symtab *s);
+void upb_symtab_ref(const upb_symtab *s, const void *owner);
+void upb_symtab_unref(const upb_symtab *s, const void *owner);
+void upb_symtab_donateref(
+    const upb_symtab *s, const void *from, const void *to);
+void upb_symtab_checkref(const upb_symtab *s, const void *owner);
+
+upb_symtab *upb_symtab_new(const void *owner);
+const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
+                                  const char *sym, const void *owner);
+const upb_def *upb_symtab_lookup(
+    const upb_symtab *s, const char *sym, const void *owner);
+const upb_msgdef *upb_symtab_lookupmsg(
+    const upb_symtab *s, const char *sym, const void *owner);
+const upb_def **upb_symtab_getdefs(
+    const upb_symtab *s, upb_deftype_t type, const void *owner, int *n);
+bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
+                    upb_status *status);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+
+// C++ inline wrappers.
+namespace upb {
+inline SymbolTable* SymbolTable::New(const void* owner) {
+  return upb_symtab_new(owner);
+}
+
+inline RefCounted* SymbolTable::Upcast() { return upb_upcast(this); }
+inline const RefCounted* SymbolTable::Upcast() const {
+  return upb_upcast(this);
+}
+inline bool SymbolTable::IsFrozen() const {
+  return upb_symtab_isfrozen(this);
+}
+inline void SymbolTable::Ref(const void *owner) const {
+  upb_symtab_ref(this, owner);
+}
+inline void SymbolTable::Unref(const void *owner) const {
+  upb_symtab_unref(this, owner);
+}
+inline void SymbolTable::DonateRef(const void *from, const void *to) const {
+  upb_symtab_donateref(this, from, to);
+}
+inline void SymbolTable::CheckRef(const void *owner) const {
+  upb_symtab_checkref(this, owner);
+}
+
+inline const Def* SymbolTable::Resolve(
+    const char* base, const char* sym, const void* owner) const {
+  return upb_symtab_resolve(this, base, sym, owner);
+}
+inline const Def* SymbolTable::Lookup(
+    const char *sym, const void *owner) const {
+  return upb_symtab_lookup(this, sym, owner);
+}
+inline const MessageDef* SymbolTable::LookupMessage(
+    const char *sym, const void *owner) const {
+  return upb_symtab_lookupmsg(this, sym, owner);
+}
+inline const Def** SymbolTable::GetDefs(
+    upb_deftype_t type, const void *owner, int *n) const {
+  return upb_symtab_getdefs(this, type, owner, n);
+}
+inline bool SymbolTable::Add(
+    Def*const* defs, int n, void* ref_donor, upb_status* status) {
+  return upb_symtab_add(this, (upb_def*const*)defs, n, ref_donor, status);
+}
+}  // namespace upb
+#endif
+
+#endif  /* UPB_SYMTAB_H_ */
--- a/upb/table.c
+++ b/upb/table.c
@ -5,14 +5,10 @@
 * Author: Josh Haberman <jhaberman@gmail.com>
 *
 * Implementation is heavily inspired by Lua's ltable.c.
- *
- * TODO: for table iteration we use (array - 1) in several places; is this
- * undefined behavior?  If so find a better solution.
 */

 #include "upb/table.h"

-#include <assert.h>
 #include <stdlib.h>
 #include <string.h>

@ -35,47 +31,56 @@ int upb_log2(uint64_t v) {
  return UPB_MIN(UPB_MAXARRSIZE, ret);
 }

+char *upb_strdup(const char *s) {
+  size_t n = strlen(s) + 1;
+  char *p = malloc(n);
+  if (p) memcpy(p, s, n);
+  return p;
+}
+
 static upb_tabkey upb_strkey(const char *str) {
  upb_tabkey k;
  k.str = (char*)str;
  return k;
 }

-static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
-typedef upb_tabent *upb_hashfunc_t(const upb_table *t, upb_tabkey key);
+typedef const upb_tabent *upb_hashfunc_t(const upb_table *t, upb_tabkey key);
 typedef bool upb_eqlfunc_t(upb_tabkey k1, upb_tabkey k2);

 /* Base table (shared code) ***************************************************/

-static size_t upb_table_size(const upb_table *t) { return 1 << t->size_lg2; }
-
 static bool upb_table_isfull(upb_table *t) {
  return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD;
 }

-static bool upb_table_init(upb_table *t, uint8_t size_lg2) {
+static bool upb_table_init(upb_table *t, upb_ctype_t type, uint8_t size_lg2) {
  t->count = 0;
+  t->type = type;
  t->size_lg2 = size_lg2;
+  t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
  size_t bytes = upb_table_size(t) * sizeof(upb_tabent);
-  t->mask = upb_table_size(t) - 1;
-  t->entries = malloc(bytes);
-  if (!t->entries) return false;
-  memset(t->entries, 0, bytes);
+  if (bytes > 0) {
+    t->entries = malloc(bytes);
+    if (!t->entries) return false;
+    memset((void*)t->entries, 0, bytes);
+  } else {
+    t->entries = NULL;
+  }
  return true;
 }

-static void upb_table_uninit(upb_table *t) { free(t->entries); }
-
-static bool upb_tabent_isempty(const upb_tabent *e) { return e->key.num == 0; }
+static void upb_table_uninit(upb_table *t) { free((void*)t->entries); }

-static upb_tabent *upb_table_emptyent(const upb_table *t) {
-  upb_tabent *e = t->entries + upb_table_size(t);
+static upb_tabent *upb_table_emptyent(upb_table *t) {
+  upb_tabent *e = (upb_tabent*)t->entries + upb_table_size(t);
  while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); }
 }

-static upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key,
-                                   upb_hashfunc_t *hash, upb_eqlfunc_t *eql) {
-  upb_tabent *e = hash(t, key);
+static const upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key,
+                                         upb_hashfunc_t *hash,
+                                         upb_eqlfunc_t *eql) {
+  if (t->size_lg2 == 0) return NULL;
+  const upb_tabent *e = hash(t, key);
  if (upb_tabent_isempty(e)) return NULL;
  while (1) {
    if (eql(e->key, key)) return &e->val;
@ -86,14 +91,19 @@ static upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key,
 // The given key must not already exist in the table.
 static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val,
                             upb_hashfunc_t *hash, upb_eqlfunc_t *eql) {
-  (void)eql;
  assert(upb_table_lookup(t, key, hash, eql) == NULL);
+  assert(val.type == t->type);
  t->count++;
-  upb_tabent *mainpos_e = hash(t, key);
+  upb_tabent *mainpos_e = (upb_tabent*)hash(t, key);
  upb_tabent *our_e = mainpos_e;
-  if (!upb_tabent_isempty(mainpos_e)) {  // Collision.
+  if (upb_tabent_isempty(mainpos_e)) {
+    // Our main position is empty; use it.
+    our_e->next = NULL;
+  } else {
+    // Collision.
    upb_tabent *new_e = upb_table_emptyent(t);
-    upb_tabent *chain = hash(t, mainpos_e->key);  // Head of collider's chain.
+    // Head of collider's chain.
+    upb_tabent *chain = (upb_tabent*)hash(t, mainpos_e->key);
    if (chain == mainpos_e) {
      // Existing ent is in its main posisiton (it has the same hash as us, and
      // is the head of our chain).  Insert to new ent and append to this chain.
@ -105,7 +115,10 @@ static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val,
      // chain).  This implies that no existing ent in the table has our hash.
      // Evict it (updating its chain) and use its ent for head of our chain.
      *new_e = *mainpos_e;  // copies next.
-      while (chain->next != mainpos_e) chain = chain->next;
+      while (chain->next != mainpos_e) {
+        chain = (upb_tabent*)chain->next;
+        assert(chain);
+      }
      chain->next = new_e;
      our_e = mainpos_e;
      our_e->next = NULL;
@ -117,27 +130,35 @@ static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val,
 }

 static bool upb_table_remove(upb_table *t, upb_tabkey key, upb_value *val,
+                             upb_tabkey *removed,
                             upb_hashfunc_t *hash, upb_eqlfunc_t *eql) {
-  upb_tabent *chain = hash(t, key);
+  upb_tabent *chain = (upb_tabent*)hash(t, key);
+  if (upb_tabent_isempty(chain)) return false;
  if (eql(chain->key, key)) {
+    // Element to remove is at the head of its chain.
    t->count--;
    if (val) *val = chain->val;
    if (chain->next) {
-      upb_tabent *move = chain->next;
+      upb_tabent *move = (upb_tabent*)chain->next;
      *chain = *move;
+      *removed = move->key;
      move->key.num = 0;  // Make the slot empty.
    } else {
+      *removed = chain->key;
      chain->key.num = 0;  // Make the slot empty.
    }
    return true;
  } else {
+    // Element to remove is either in a non-head position or not in the table.
    while (chain->next && !eql(chain->next->key, key))
-      chain = chain->next;
+      chain = (upb_tabent*)chain->next;
    if (chain->next) {
      // Found element to remove.
      if (val) *val = chain->next->val;
-      chain->next->key.num = 0;
-      chain->next = chain->next->next;
+      upb_tabent *remove = (upb_tabent*)chain->next;
+      *removed = remove->key;
+      remove->key.num = 0;
+      chain->next = remove->next;
      t->count--;
      return true;
    } else {
@ -146,13 +167,16 @@ static bool upb_table_remove(upb_table *t, upb_tabkey key, upb_value *val,
  }
 }

-static upb_tabent *upb_table_next(const upb_table *t, upb_tabent *e) {
-  upb_tabent *end = t->entries + upb_table_size(t);
+static const upb_tabent *upb_table_next(const upb_table *t,
+                                        const upb_tabent *e) {
+  const upb_tabent *end = t->entries + upb_table_size(t);
  do { if (++e == end) return NULL; } while(e->key.num == 0);
  return e;
 }

-static upb_tabent *upb_table_begin(const upb_table *t) {
+// TODO: is calculating t->entries - 1 undefined behavior?  If so find a better
+// solution.
+static const upb_tabent *upb_table_begin(const upb_table *t) {
  return upb_table_next(t, t->entries - 1);
 }

@ -161,7 +185,7 @@ static upb_tabent *upb_table_begin(const upb_table *t) {

 // A simple "subclass" of upb_table that only adds a hash function for strings.

-static upb_tabent *upb_strhash(const upb_table *t, upb_tabkey key) {
+static const upb_tabent *upb_strhash(const upb_table *t, upb_tabkey key) {
  // Could avoid the strlen() by using a hash function that terminates on NULL.
  return t->entries + (MurmurHash2(key.str, strlen(key.str), 0) & t->mask);
 }
@ -170,11 +194,13 @@ static bool upb_streql(upb_tabkey k1, upb_tabkey k2) {
  return strcmp(k1.str, k2.str) == 0;
 }

-bool upb_strtable_init(upb_strtable *t) { return upb_table_init(&t->t, 4); }
+bool upb_strtable_init(upb_strtable *t, upb_ctype_t type) {
+  return upb_table_init(&t->t, type, 2);
+}

 void upb_strtable_uninit(upb_strtable *t) {
  for (size_t i = 0; i < upb_table_size(&t->t); i++)
-    free(t->t.entries[i].key.str);
+    free((void*)t->t.entries[i].key.str);
  upb_table_uninit(&t->t);
 }

@ -182,7 +208,8 @@ bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) {
  if (upb_table_isfull(&t->t)) {
    // Need to resize.  New table of double the size, add old elements to it.
    upb_strtable new_table;
-    if (!upb_table_init(&new_table.t, t->t.size_lg2 + 1)) return false;
+    if (!upb_table_init(&new_table.t, t->t.type, t->t.size_lg2 + 1))
+      return false;
    upb_strtable_iter i;
    upb_strtable_begin(&i, t);
    for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
@ -192,15 +219,23 @@ bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) {
    upb_strtable_uninit(t);
    *t = new_table;
  }
-  if ((k = strdup(k)) == NULL) return false;
+  if ((k = upb_strdup(k)) == NULL) return false;
  upb_table_insert(&t->t, upb_strkey(k), v, &upb_strhash, &upb_streql);
  return true;
 }

-upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key) {
+const upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key) {
  return upb_table_lookup(&t->t, upb_strkey(key), &upb_strhash, &upb_streql);
 }

+bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val) {
+  upb_tabkey removed;
+  bool found = upb_table_remove(
+      &t->t, upb_strkey(key), val, &removed, &upb_strhash, &upb_streql);
+  if (found) free((void*)removed.str);
+  return found;
+}
+
 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
  i->t = t;
  i->e = upb_table_begin(&t->t);
@ -224,8 +259,9 @@ size_t upb_inttable_count(const upb_inttable *t) {
  return t->t.count + t->array_count;
 }

-bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2) {
-  if (!upb_table_init(&t->t, hsize_lg2)) return false;
+bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t type,
+                            size_t asize, int hsize_lg2) {
+  if (!upb_table_init(&t->t, type, hsize_lg2)) return false;
  // Always make the array part at least 1 long, so that we know key 0
  // won't be in the hash part, which simplifies things.
  t->array_size = UPB_MAX(1, asize);
@ -236,17 +272,32 @@ bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2) {
    upb_table_uninit(&t->t);
    return false;
  }
-  memset(t->array, 0xff, array_bytes);
+  memset((void*)t->array, 0xff, array_bytes);
  return true;
 }

-bool upb_inttable_init(upb_inttable *t) {
-  return upb_inttable_sizedinit(t, 0, 4);
+bool upb_inttable_init(upb_inttable *t, upb_ctype_t type) {
+  return upb_inttable_sizedinit(t, type, 0, 4);
 }

 void upb_inttable_uninit(upb_inttable *t) {
  upb_table_uninit(&t->t);
-  free(t->array);
+  free((void*)t->array);
+}
+
+static void upb_inttable_check(upb_inttable *t) {
+  UPB_UNUSED(t);
+#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
+  // This check is very expensive (makes inserts/deletes O(N)).
+  size_t count = 0;
+  upb_inttable_iter i;
+  upb_inttable_begin(&i, t);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
+    const upb_value *v = upb_inttable_lookup(t, upb_inttable_iter_key(&i));
+    assert(v);
+  }
+  assert(count == upb_inttable_count(t));
+#endif
 }

 bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
@ -254,45 +305,78 @@ bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
  if (key < t->array_size) {
    assert(!upb_arrhas(t->array[key]));
    t->array_count++;
-    t->array[key] = val;
+    ((upb_value*)t->array)[key] = val;
  } else {
    if (upb_table_isfull(&t->t)) {
      // Need to resize the hash part, but we re-use the array part.
      upb_table new_table;
-      if (!upb_table_init(&new_table, t->t.size_lg2 + 1)) return false;
-      upb_tabent *e;
+      if (!upb_table_init(&new_table, t->t.type, t->t.size_lg2 + 1))
+        return false;
+      const upb_tabent *e;
      for (e = upb_table_begin(&t->t); e; e = upb_table_next(&t->t, e))
        upb_table_insert(&new_table, e->key, e->val, &upb_inthash, &upb_inteql);
+
+      assert(t->t.count == new_table.count);
+
      upb_table_uninit(&t->t);
      t->t = new_table;
    }
    upb_table_insert(&t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql);
  }
+  upb_inttable_check(t);
  return true;
 }

-upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key) {
+const upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key) {
  if (key < t->array_size) {
-    upb_value *v = &t->array[key];
+    const upb_value *v = &t->array[key];
    return upb_arrhas(*v) ? v : NULL;
  }
  return upb_table_lookup(&t->t, upb_intkey(key), &upb_inthash, &upb_inteql);
 }

 bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
+  bool success;
  if (key < t->array_size) {
    if (upb_arrhas(t->array[key])) {
      t->array_count--;
      if (val) *val = t->array[key];
-      t->array[key] = upb_value_uint64(-1);
-      return true;
+      ((upb_value*)t->array)[key] = upb_value_uint64(-1);
+      success = true;
    } else {
-      return false;
+      success = false;
    }
  } else {
-    return upb_table_remove(
-        &t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql);
+    upb_tabkey removed;
+    success = upb_table_remove(
+        &t->t, upb_intkey(key), val, &removed, &upb_inthash, &upb_inteql);
  }
+  upb_inttable_check(t);
+  return success;
+}
+
+bool upb_inttable_push(upb_inttable *t, upb_value val) {
+  return upb_inttable_insert(t, upb_inttable_count(t), val);
+}
+
+upb_value upb_inttable_pop(upb_inttable *t) {
+  upb_value val;
+  bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
+  UPB_ASSERT_VAR(ok, ok);
+  return val;
+}
+
+bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val) {
+  return upb_inttable_insert(t, (uintptr_t)key, val);
+}
+
+const upb_value *upb_inttable_lookupptr(const upb_inttable *t,
+                                        const void *key) {
+  return upb_inttable_lookup(t, (uintptr_t)key);
+}
+
+bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
+  return upb_inttable_remove(t, (uintptr_t)key, val);
 }

 void upb_inttable_compact(upb_inttable *t) {
@ -301,7 +385,10 @@ void upb_inttable_compact(upb_inttable *t) {
  upb_inttable_iter i;
  for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i))
    counts[upb_log2(upb_inttable_iter_key(&i))]++;
-  int count = upb_inttable_count(t);
+  // Int part must always be at least 1 entry large to catch lookups of key 0.
+  // Key 0 must always be in the array part because "0" in the hash part
+  // denotes an empty entry.
+  int count = UPB_MAX(upb_inttable_count(t), 1);
  int size;
  for (size = UPB_MAXARRSIZE; size > 1; size--) {
    count -= counts[size];
@ -311,7 +398,8 @@ void upb_inttable_compact(upb_inttable *t) {
  // Insert all elements into new, perfectly-sized table.
  upb_inttable new_table;
  int hashsize = (upb_inttable_count(t) - count + 1) / MAX_LOAD;
-  upb_inttable_sizedinit(&new_table, size, upb_log2(hashsize) + 1);
+
+  upb_inttable_sizedinit(&new_table, t->t.type, size, upb_log2(hashsize));
  for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i))
    upb_inttable_insert(
        &new_table, upb_inttable_iter_key(&i), upb_inttable_iter_value(&i));
@ -352,7 +440,7 @@ void upb_inttable_next(upb_inttable_iter *iter) {
 //   1. It will not work incrementally.
 //   2. It will not produce the same results on little-endian and big-endian
 //      machines.
-static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
+uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
  // 'm' and 'r' are mixing constants generated offline.
  // They're not really 'magic', they just happen to work well.
  const uint32_t m = 0x5bd1e995;
@ -403,7 +491,7 @@ static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {

 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }

-static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
+uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
  const uint32_t m = 0x5bd1e995;
  const int32_t r = 24;
  const uint8_t * data = (const uint8_t *)key;
--- a/upb/table.h
+++ b/upb/table.h
@ -17,13 +17,16 @@
 *
 * This header is internal to upb; its interface should not be considered
 * public or stable.
+ *
+ * The table must be homogenous (all values of the same type).  We currently
+ * enforce this on insert but store the full upb_value (with type) anyway.
+ * This is required with the current interface because lookups vend a pointer
+ * to the table's internal storage.
 */

 #ifndef UPB_TABLE_H_
 #define UPB_TABLE_H_

-#include <stddef.h>
-#include <stdint.h>
 #include "upb.h"

 #ifdef __cplusplus
@ -32,45 +35,80 @@ extern "C" {

 typedef union {
  uintptr_t num;
-  char *str;  // We own, nullz.
+  const char *str;  // We own, nullz.
 } upb_tabkey;

+#define UPB_TABKEY_NUM(n) {n}
+#ifdef UPB_C99
+#define UPB_TABKEY_STR(s) {.str = s}
+#endif
+// TODO(haberman): C++
+#define UPB_TABKEY_NONE {0}
+
 typedef struct _upb_tabent {
  upb_tabkey key;
+  // Storing a upb_value here wastes a bit of memory in debug mode because
+  // we are storing the type for each value even though we enforce that all
+  // values are the same.  But since this only affects debug mode, we don't
+  // worry too much about it.  The same applies to upb_inttable.array below.
  upb_value val;
-  struct _upb_tabent *next;  // Internal chaining.
+  // Internal chaining.  This is const so we can create static initializers for
+  // tables.  We cast away const sometimes, but *only* when the containing
+  // upb_table is known to be non-const.  This requires a bit of care, but
+  // the subtlety is confined to table.c.
+  const struct _upb_tabent *next;
 } upb_tabent;

 typedef struct {
-  upb_tabent *entries;   // Hash table.
  size_t count;          // Number of entries in the hash part.
  size_t mask;           // Mask to turn hash value -> bucket.
+  upb_ctype_t type;      // Type of all values.
  uint8_t size_lg2;      // Size of the hash table part is 2^size_lg2 entries.
+  const upb_tabent *entries;   // Hash table.
 } upb_table;

 typedef struct {
  upb_table t;
 } upb_strtable;

+#define UPB_STRTABLE_INIT(count, mask, type, size_lg2, entries) \
+  {{count, mask, type, size_lg2, entries}}
+
 typedef struct {
-  upb_table t;           // For entries that don't fit in the array part.
-  upb_value *array;      // Array part of the table.
-  size_t array_size;     // Array part size.
-  size_t array_count;    // Array part number of elements.
+  upb_table t;             // For entries that don't fit in the array part.
+  const upb_value *array;  // Array part of the table.
+  size_t array_size;       // Array part size.
+  size_t array_count;      // Array part number of elements.
 } upb_inttable;

-INLINE upb_tabkey upb_intkey(uintptr_t key) { upb_tabkey k = {key}; return k; }
+#define UPB_INTTABLE_INIT(count, mask, type, size_lg2, ent, a, asize, acount) \
+  {{count, mask, type, size_lg2, ent}, a, asize, acount}

-INLINE upb_tabent *upb_inthash(const upb_table *t, upb_tabkey key) {
-  return t->entries + ((uint32_t)key.num & t->mask);
+#define UPB_EMPTY_INTTABLE_INIT(type) \
+  UPB_INTTABLE_INIT(0, 0, type, 0, NULL, NULL, 0, 0)
+
+#define UPB_ARRAY_EMPTYENT UPB_VALUE_INIT_INT64(-1)
+
+INLINE size_t upb_table_size(const upb_table *t) {
+  if (t->size_lg2 == 0)
+    return 0;
+  else
+    return 1 << t->size_lg2;
 }

+// Internal-only functions, in .h file only out of necessity.
+INLINE bool upb_tabent_isempty(const upb_tabent *e) { return e->key.num == 0; }
+INLINE upb_tabkey upb_intkey(uintptr_t key) { upb_tabkey k = {key}; return k; }
+INLINE const upb_tabent *upb_inthash(const upb_table *t, upb_tabkey key) {
+  return t->entries + ((uint32_t)key.num & t->mask);
+}
 INLINE bool upb_arrhas(upb_value v) { return v.val.uint64 != (uint64_t)-1; }
+uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);

 // Initialize and uninitialize a table, respectively.  If memory allocation
 // failed, false is returned that the table is uninitialized.
-bool upb_inttable_init(upb_inttable *table);
-bool upb_strtable_init(upb_strtable *table);
+bool upb_inttable_init(upb_inttable *table, upb_ctype_t type);
+bool upb_strtable_init(upb_strtable *table, upb_ctype_t type);
 void upb_inttable_uninit(upb_inttable *table);
 void upb_strtable_uninit(upb_strtable *table);

@ -90,14 +128,24 @@ bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val);

 // Looks up key in this table, returning a pointer to the table's internal copy
 // of the user's inserted data, or NULL if this key is not in the table.  The
-// user is free to modify the given upb_value, which will be reflected in any
-// future lookups of this key.  The returned pointer is invalidated by inserts.
-upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key);
-upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key);
+// returned pointer is invalidated by inserts.
+const upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key);
+const upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key);

 // Removes an item from the table.  Returns true if the remove was successful,
 // and stores the removed item in *val if non-NULL.
 bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val);
+bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val);
+
+// Handy routines for treating an inttable like a stack.  May not be mixed with
+// other insert/remove calls.
+bool upb_inttable_push(upb_inttable *t, upb_value val);
+upb_value upb_inttable_pop(upb_inttable *t);
+
+// Convenience routines for inttables with pointer keys.
+bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val);
+bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val);
+const upb_value *upb_inttable_lookupptr(const upb_inttable *t, const void *key);

 // Optimizes the table for the current set of entries, for both memory use and
 // lookup time.  Client should call this after all entries have been inserted;
@ -105,12 +153,15 @@ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val);
 void upb_inttable_compact(upb_inttable *t);

 // A special-case inlinable version of the lookup routine for 32-bit integers.
-INLINE upb_value *upb_inttable_lookup32(const upb_inttable *t, uint32_t key) {
+INLINE const upb_value *upb_inttable_lookup32(const upb_inttable *t,
+                                              uint32_t key) {
  if (key < t->array_size) {
-    upb_value *v = &t->array[key];
+    const upb_value *v = &t->array[key];
    return upb_arrhas(*v) ? v : NULL;
  }
-  for (upb_tabent *e = upb_inthash(&t->t, upb_intkey(key)); true; e = e->next) {
+  const upb_tabent *e;
+  if (t->t.entries == NULL) return NULL;
+  for (e = upb_inthash(&t->t, upb_intkey(key)); true; e = e->next) {
    if ((uint32_t)e->key.num == key) return &e->val;
    if (e->next == NULL) return NULL;
  }
@ -124,12 +175,12 @@ INLINE upb_value *upb_inttable_lookup32(const upb_inttable *t, uint32_t key) {
 //   upb_strtable_begin(&i, t);
 //   for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
 //     const char *key = upb_strtable_iter_key(&i);
-//     const myval *val = upb_strtable_iter_value(&i);
+//     const upb_value val = upb_strtable_iter_value(&i);
 //     // ...
 //   }
 typedef struct {
  const upb_strtable *t;
-  upb_tabent *e;
+  const upb_tabent *e;
 } upb_strtable_iter;

 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t);
@ -149,13 +200,15 @@ INLINE upb_value upb_strtable_iter_value(upb_strtable_iter *i) {
 //   upb_inttable_iter i;
 //   upb_inttable_begin(&i, t);
 //   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+//     uintptr_t key = upb_inttable_iter_key(&i);
+//     upb_value val = upb_inttable_iter_value(&i);
 //     // ...
 //   }
 typedef struct {
  const upb_inttable *t;
  union {
-    upb_tabent *ent;  // For hash iteration.
-    upb_value *val;   // For array iteration.
+    const upb_tabent *ent;  // For hash iteration.
+    const upb_value *val;   // For array iteration.
  } ptr;
  uintptr_t arrkey;
  bool array_part;
--- a/upb/upb.c
+++ b/upb/upb.c
@ -29,24 +29,31 @@ void upb_status_uninit(upb_status *status) {
  free(status->buf);
 }

-void upb_status_seterrf(upb_status *s, const char *msg, ...) {
-  s->code = UPB_ERROR;
+bool upb_ok(const upb_status *status) { return !status->error; }
+bool upb_eof(const upb_status *status) { return status->eof_; }
+
+void upb_status_seterrf(upb_status *status, const char *msg, ...) {
+  if (!status) return;
+  status->error = true;
+  status->space = NULL;
  va_list args;
  va_start(args, msg);
-  upb_vrprintf(&s->buf, &s->bufsize, 0, msg, args);
+  upb_vrprintf(&status->buf, &status->bufsize, 0, msg, args);
  va_end(args);
-  s->str = s->buf;
+  status->str = status->buf;
 }

 void upb_status_seterrliteral(upb_status *status, const char *msg) {
+  if (!status) return;
  status->error = true;
  status->str = msg;
  status->space = NULL;
 }

 void upb_status_copy(upb_status *to, const upb_status *from) {
+  if (!to) return;
  to->error = from->error;
-  to->eof = from->eof;
+  to->eof_ = from->eof_;
  to->code = from->code;
  to->space = from->space;
  if (from->str == from->buf) {
@ -78,19 +85,26 @@ const char *upb_status_getstr(const upb_status *_status) {
 }

 void upb_status_clear(upb_status *status) {
+  if (!status) return;
  status->error = false;
-  status->eof = false;
+  status->eof_ = false;
  status->code = 0;
  status->space = NULL;
  status->str = NULL;
 }

 void upb_status_setcode(upb_status *status, upb_errorspace *space, int code) {
+  if (!status) return;
  status->code = code;
  status->space = space;
  status->str = NULL;
 }

+void upb_status_seteof(upb_status *status) {
+  if (!status) return;
+  status->eof_ = true;
+}
+
 int upb_vrprintf(char **buf, size_t *size, size_t ofs,
                 const char *fmt, va_list args) {
  // Try once without reallocating.  We have to va_copy because we might have
--- a/upb/upb.h
+++ b/upb/upb.h
@ -5,6 +5,9 @@
 * Author: Josh Haberman <jhaberman@gmail.com>
 *
 * This file contains shared definitions that are widely used across upb.
+ *
+ * This is a mixed C/C++ interface that offers a full API to both languages.
+ * See the top-level README for more information.
 */

 #ifndef UPB_H_
@ -25,6 +28,28 @@ extern "C" {
 #define INLINE static inline
 #endif

+#if __STDC_VERSION__ >= 199901L
+#define UPB_C99
+#endif
+
+#if (defined(__cplusplus) && __cplusplus >= 201103L) || defined(__GXX_EXPERIMENTAL_CXX0X__)
+#define UPB_CXX11
+#endif
+
+#if defined(__GXX_EXPERIMENTAL_CXX0X__) && !defined(UPB_NO_CXX11)
+#define UPB_DISALLOW_POD_OPS(class_name) \
+  class_name() = delete; \
+  ~class_name() = delete; \
+  class_name(const class_name&) = delete; \
+  void operator=(const class_name&) = delete;
+#else
+#define UPB_DISALLOW_POD_OPS(class_name) \
+  class_name(); \
+  ~class_name(); \
+  class_name(const class_name&); \
+  void operator=(const class_name&);
+#endif
+
 #ifdef __GNUC__
 #define UPB_NORETURN __attribute__((__noreturn__))
 #else
@ -32,12 +57,33 @@ extern "C" {
 #endif

 #ifndef UINT16_MAX
-#define UINT16_MAX 65535
+#define UINT16_MAX 0xffff
+#endif
+
+#ifndef UINT32_MAX
+#define UINT32_MAX 0xffffffff
 #endif

 #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
 #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))

+// For our C-based inheritance, sometimes it's necessary to upcast an object to
+// its base class.  We try to minimize the need for this by replicating base
+// class functions in the derived class -- the derived class functions simply
+// forward to the base class implementations.  This strategy simplifies the C++
+// API since we can't use real C++ inheritance.
+#define upb_upcast(obj) (&(obj)->base)
+#define upb_upcast2(obj) upb_upcast(upb_upcast(obj))
+
+char *upb_strdup(const char *s);
+
+#define UPB_UNUSED(var) (void)var
+
+// For asserting something about a variable when the variable is not used for
+// anything else.  This prevents "unused variable" warnings when compiling in
+// debug mode.
+#define UPB_ASSERT_VAR(var, predicate) UPB_UNUSED(var); assert(predicate)
+
 // The maximum that any submessages can be nested.  Matches proto2's limit.
 // At the moment this specifies the size of several statically-sized arrays
 // and therefore setting it high will cause more memory to be used.  Will
@ -45,19 +91,7 @@ extern "C" {
 // TODO: make this a runtime-settable property of upb_handlers.
 #define UPB_MAX_NESTING 64

-// The maximum number of fields that any one .proto type can have.  Note that
-// this is very different than the max field number.  It is hard to imagine a
-// scenario where more than 2k fields (each with its own name and field number)
-// makes sense.  The .proto file to describe it would be 2000 lines long and
-// contain 2000 unique names.
-//
-// With this limit we can store a has-bit offset in 8 bits (2**8 * 8 = 2048)
-// and we can store a value offset in 16 bits, since the maximum message
-// size is 16,640 bytes (2**8 has-bits + 2048 * 8-byte value).  Note that
-// strings and arrays are not counted in this, only the *pointer* to them is.
-// An individual string or array is unaffected by this 16k byte limit.
-#define UPB_MAX_FIELDS (2048)
-
+// Inherent limit of protobuf wire format and schema definition.
 #define UPB_MAX_FIELDNUMBER ((1 << 29) - 1)

 // Nested type names are separated by periods.
@ -81,7 +115,99 @@ extern "C" {
 #define UPB_MAX_TYPE_DEPTH 64


-/* upb_value ******************************************************************/
+/* upb::Status ****************************************************************/
+
+#ifdef __cplusplus
+namespace upb { class Status; }
+typedef upb::Status upb_status;
+#else
+struct upb_status;
+typedef struct upb_status upb_status;
+#endif
+
+typedef enum {
+  UPB_OK,          // The operation completed successfully.
+  UPB_SUSPENDED,   // The operation was suspended and may be resumed later.
+  UPB_ERROR,       // An error occurred.
+} upb_success_t;
+
+typedef struct {
+  const char *name;
+  // Writes a NULL-terminated string to "buf" containing an error message for
+  // the given error code, returning false if the message was too large to fit.
+  bool (*code_to_string)(int code, char *buf, size_t len);
+} upb_errorspace;
+
+#ifdef __cplusplus
+
+class upb::Status {
+ public:
+  typedef upb_success_t Success;
+
+  Status();
+  ~Status();
+
+  bool ok();
+  bool eof();
+
+  const char *GetString() const;
+  void SetEof();
+  void SetErrorLiteral(const char* msg);
+  void Clear();
+
+ private:
+#else
+struct upb_status {
+#endif
+  bool error;
+  bool eof_;
+
+  // Specific status code defined by some error space (optional).
+  int code;
+  upb_errorspace *space;
+
+  // Error message (optional).
+  const char *str;  // NULL when no message is present.  NULL-terminated.
+  char *buf;        // Owned by the status.
+  size_t bufsize;
+};
+
+#define UPB_STATUS_INIT {UPB_OK, false, 0, NULL, NULL, NULL, 0}
+
+void upb_status_init(upb_status *status);
+void upb_status_uninit(upb_status *status);
+
+bool upb_ok(const upb_status *status);
+bool upb_eof(const upb_status *status);
+
+// Any of the functions that write to a status object allow status to be NULL,
+// to support use cases where the function's caller does not care about the
+// status message.
+void upb_status_clear(upb_status *status);
+void upb_status_seterrliteral(upb_status *status, const char *msg);
+void upb_status_seterrf(upb_status *status, const char *msg, ...);
+void upb_status_setcode(upb_status *status, upb_errorspace *space, int code);
+void upb_status_seteof(upb_status *status);
+// The returned string is invalidated by any other call into the status.
+const char *upb_status_getstr(const upb_status *status);
+void upb_status_copy(upb_status *to, const upb_status *from);
+
+// Like vasprintf (which allocates a string large enough for the result), but
+// uses *buf (which can be NULL) as a starting point and reallocates it only if
+// the new value will not fit.  "size" is updated to reflect the allocated size
+// of the buffer.  Starts writing at the given offset into the string; bytes
+// preceding this offset are unaffected.  Returns the new length of the string,
+// or -1 on memory allocation failure.
+int upb_vrprintf(char **buf, size_t *size, size_t ofs,
+                 const char *fmt, va_list args);
+
+
+/* upb::Value *****************************************************************/
+
+// TODO(haberman): upb::Value is gross and should be retired from the public
+// interface (we *may* still want to keep it for internal use).  upb::Handlers
+// and upb::Def should replace their use of Value with one function for each C
+// type.

 // Clients should not need to access these enum values; they are used internally
 // to do typechecks of upb_value accesses.
@ -93,13 +219,19 @@ typedef enum {
  UPB_CTYPE_DOUBLE = 5,
  UPB_CTYPE_FLOAT = 6,
  UPB_CTYPE_BOOL = 7,
-  UPB_CTYPE_PTR = 8,
-  UPB_CTYPE_BYTEREGION = 9,
-  UPB_CTYPE_FIELDDEF = 10,
+  UPB_CTYPE_CSTR = 8,
+  UPB_CTYPE_PTR = 9,
+  UPB_CTYPE_BYTEREGION = 10,
+  UPB_CTYPE_FIELDDEF = 11,
 } upb_ctype_t;

-struct _upb_byteregion;
-struct _upb_fielddef;
+#ifdef __cplusplus
+namespace upb { class ByteRegion; }
+typedef upb::ByteRegion upb_byteregion;
+#else
+struct upb_byteregion;
+typedef struct upb_byteregion upb_byteregion;
+#endif

 // A single .proto value.  The owner must have an out-of-band way of knowing
 // the type, so that it knows which union member to use.
@ -112,9 +244,10 @@ typedef struct {
    double _double;
    float _float;
    bool _bool;
-    void *_void;
-    struct _upb_byteregion *byteregion;
-    const struct _upb_fielddef *fielddef;
+    char *cstr;
+    void *ptr;
+    const void *constptr;
+    upb_byteregion *byteregion;
  } val;

 #ifndef NDEBUG
@ -124,12 +257,32 @@ typedef struct {
 #endif
 } upb_value;

+#ifdef UPB_C99
+#define UPB_VAL_INIT(v, member) {.member = v}
+#endif
+// TODO(haberman): C++
+
 #ifdef NDEBUG
 #define SET_TYPE(dest, val)
+#define UPB_VALUE_INIT(v, member, type) {UPB_VAL_INIT(v, member)}
 #else
 #define SET_TYPE(dest, val) dest = val
+#define UPB_VALUE_INIT(v, member, type) {UPB_VAL_INIT(v, member), type}
 #endif

+#define UPB_VALUE_INIT_INT32(v)  UPB_VALUE_INIT(v, int32,   UPB_CTYPE_INT32)
+#define UPB_VALUE_INIT_INT64(v)  UPB_VALUE_INIT(v, int64,   UPB_CTYPE_INT64)
+#define UPB_VALUE_INIT_UINT32(v) UPB_VALUE_INIT(v, uint32,  UPB_CTYPE_UINT32)
+#define UPB_VALUE_INIT_UINT64(v) UPB_VALUE_INIT(v, uint64,  UPB_CTYPE_UINT64)
+#define UPB_VALUE_INIT_DOUBLE(v) UPB_VALUE_INIT(v, _double, UPB_CTYPE_DOUBLE)
+#define UPB_VALUE_INIT_FLOAT(v)  UPB_VALUE_INIT(v, _float,  UPB_CTYPE_FLOAT)
+#define UPB_VALUE_INIT_BOOL(v)   UPB_VALUE_INIT(v, _bool,   UPB_CTYPE_BOOL)
+#define UPB_VALUE_INIT_CSTR(v)   UPB_VALUE_INIT(v, cstr,    UPB_CTYPE_CSTR)
+#define UPB_VALUE_INIT_PTR(v)    UPB_VALUE_INIT(v, ptr,     UPB_CTYPE_PTR)
+#define UPB_VALUE_INIT_CONSTPTR(v) UPB_VALUE_INIT(v, constptr, UPB_CTYPE_PTR)
+// Non-existent type, all reads will fail.
+#define UPB_VALUE_INIT_NONE      UPB_VALUE_INIT(NULL, ptr, -1)
+
 // For each value type, define the following set of functions:
 //
 // // Get/set an int32 from a upb_value.
@ -174,12 +327,9 @@ ALL(int64,  int64,   int64_t,  UPB_CTYPE_INT64);
 ALL(uint32, uint32,  uint32_t, UPB_CTYPE_UINT32);
 ALL(uint64, uint64,  uint64_t, UPB_CTYPE_UINT64);
 ALL(bool,   _bool,   bool,     UPB_CTYPE_BOOL);
-ALL(ptr,    _void,   void*,    UPB_CTYPE_PTR);
-ALL(byteregion, byteregion, struct _upb_byteregion*, UPB_CTYPE_BYTEREGION);
-
-// upb_fielddef should never be modified from a callback
-// (ie. when they're getting passed through a upb_value).
-ALL(fielddef, fielddef, const struct _upb_fielddef*, UPB_CTYPE_FIELDDEF);
+ALL(cstr,   cstr,    char*,    UPB_CTYPE_CSTR);
+ALL(ptr,    ptr,     void*,    UPB_CTYPE_PTR);
+ALL(byteregion, byteregion, upb_byteregion*, UPB_CTYPE_BYTEREGION);

 #ifdef __KERNEL__
 // Linux kernel modules are compiled without SSE and therefore are incapable
@ -199,64 +349,55 @@ ALL(float,  _float,  float,    UPB_CTYPE_FLOAT);

 extern upb_value UPB_NO_VALUE;

+#ifdef __cplusplus
+}  // extern "C"

-/* upb_status *****************************************************************/
-
-typedef enum {
-  UPB_OK,          // The operation completed successfully.
-  UPB_SUSPENDED,   // The operation was suspended and may be resumed later.
-  UPB_ERROR,       // An error occurred.
-} upb_success_t;
-
-typedef struct {
-  const char *name;
-  // Writes a NULL-terminated string to "buf" containing an error message for
-  // the given error code, returning false if the message was too large to fit.
-  bool (*code_to_string)(int code, char *buf, size_t len);
-} upb_errorspace;
-
-typedef struct {
-  bool error;
-  bool eof;
-
-  // Specific status code defined by some error space (optional).
-  int code;
-  upb_errorspace *space;
-
-  // Error message (optional).
-  const char *str;  // NULL when no message is present.  NULL-terminated.
-  char *buf;        // Owned by the status.
-  size_t bufsize;
-} upb_status;
-
-#define UPB_STATUS_INIT {UPB_OK, false, 0, NULL, NULL, NULL, 0}
+namespace upb {

-void upb_status_init(upb_status *status);
-void upb_status_uninit(upb_status *status);
+typedef upb_value Value;

-INLINE bool upb_ok(const upb_status *status) { return !status->error; }
-INLINE bool upb_eof(const upb_status *status) { return status->eof; }
+template <typename T> T GetValue(Value v);
+template <typename T> Value MakeValue(T v);

-void upb_status_clear(upb_status *status);
-void upb_status_seterrliteral(upb_status *status, const char *msg);
-void upb_status_seterrf(upb_status *s, const char *msg, ...);
-void upb_status_setcode(upb_status *s, upb_errorspace *space, int code);
-INLINE void upb_status_seteof(upb_status *s) { s->eof = true; }
-// The returned string is invalidated by any other call into the status.
-const char *upb_status_getstr(const upb_status *s);
-void upb_status_copy(upb_status *to, const upb_status *from);
+#define UPB_VALUE_ACCESSORS(type, ctype) \
+  template <> inline ctype GetValue<ctype>(Value v) { \
+    return upb_value_get ## type(v); \
+  } \
+  template <> inline Value MakeValue<ctype>(ctype v) { \
+    return upb_value_ ## type(v); \
+  }

-// Like vasprintf (which allocates a string large enough for the result), but
-// uses *buf (which can be NULL) as a starting point and reallocates it only if
-// the new value will not fit.  "size" is updated to reflect the allocated size
-// of the buffer.  Starts writing at the given offset into the string; bytes
-// preceding this offset are unaffected.  Returns the new length of the string,
-// or -1 on memory allocation failure.
-int upb_vrprintf(char **buf, size_t *size, size_t ofs,
-                 const char *fmt, va_list args);
+UPB_VALUE_ACCESSORS(double, double);
+UPB_VALUE_ACCESSORS(float,  float);
+UPB_VALUE_ACCESSORS(int32,  int32_t);
+UPB_VALUE_ACCESSORS(int64,  int64_t);
+UPB_VALUE_ACCESSORS(uint32, uint32_t);
+UPB_VALUE_ACCESSORS(uint64, uint64_t);
+UPB_VALUE_ACCESSORS(bool,   bool);
+
+#undef UPB_VALUE_ACCESSORS
+
+template <typename T> inline T* GetPtrValue(Value v) {
+  return static_cast<T*>(upb_value_getptr(v));
+}
+template <typename T> inline Value MakePtrValue(T* v) {
+  return upb_value_ptr(static_cast<void*>(v));
+}
+
+// C++ Wrappers
+inline Status::Status() { upb_status_init(this); }
+inline Status::~Status() { upb_status_uninit(this); }
+inline bool Status::ok() { return upb_ok(this); }
+inline bool Status::eof() { return upb_eof(this); }
+inline const char *Status::GetString() const { return upb_status_getstr(this); }
+inline void Status::SetEof() { upb_status_seteof(this); }
+inline void Status::SetErrorLiteral(const char* msg) {
+  upb_status_seterrliteral(this, msg);
+}
+inline void Status::Clear() { upb_status_clear(this); }
+
+}  // namespace upb

-#ifdef __cplusplus
-}  /* extern "C" */
 #endif

 #endif  /* UPB_H_ */