diff --git a/Makefile b/Makefile
index bebe023b72..5320876340 100644
--- a/Makefile
+++ b/Makefile
@@ -162,13 +162,9 @@ upb/pb/jit_debug_elf_file.o: upb/pb/jit_debug_elf_file.s
 	$(E) GAS $<
 	$(Q) gcc -c upb/pb/jit_debug_elf_file.s -o upb/pb/jit_debug_elf_file.o
 
-upb/pb/jit_debug_elf_file2.o: upb/pb/jit_debug_elf_file.o
-	$(E) OBJCOPY $<
-	$(Q) objcopy --change-section-address .text=0x12345678 $< $@
-
-upb/pb/jit_debug_elf_file.h: upb/pb/jit_debug_elf_file2.o
+upb/pb/jit_debug_elf_file.h: upb/pb/jit_debug_elf_file.o
 	$(E) XXD $<
-	$(Q) xxd -i < upb/pb/jit_debug_elf_file2.o > upb/pb/jit_debug_elf_file.h
+	$(Q) xxd -i < upb/pb/jit_debug_elf_file.o > upb/pb/jit_debug_elf_file.h
 upb/pb/decoder_x64.h: upb/pb/jit_debug_elf_file.h
 endif
 
@@ -232,15 +228,13 @@ VALGRIND=valgrind --leak-check=full --error-exitcode=1
 test: tests
 	@echo Running all tests under valgrind.
 	@set -e  # Abort on error.
-	@for test in $(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS); do \
+	@for test in $(TESTS); do \
 	  if [ -x ./$$test ] ; then \
 	    echo !!! $(VALGRIND) ./$$test; \
-	    $(VALGRIND) ./$$test tests/test.proto.pb || exit 1; \
+	    $(VALGRIND) ./$$test || exit 1; \
 	  fi \
 	done; \
-	$(VALGRIND) ./tests/t.test_vs_proto2.googlemessage1 benchmarks/google_messages.proto.pb benchmarks/google_message1.dat
-	$(VALGRIND) ./tests/t.test_vs_proto2.googlemessage2 benchmarks/google_messages.proto.pb benchmarks/google_message2.dat
-	@echo "All tests passed!"
+	echo "All tests passed!"
 
 tests/t.test_vs_proto2.googlemessage1 \
 tests/t.test_vs_proto2.googlemessage2: \
diff --git a/benchmarks/parsestream.upb.c b/benchmarks/parsestream.upb.c
index 19d8ccf5fd..4d13e9d137 100644
--- a/benchmarks/parsestream.upb.c
+++ b/benchmarks/parsestream.upb.c
@@ -76,7 +76,8 @@ static size_t run(int i)
   (void)i;
   upb_status status = UPB_STATUS_INIT;
   upb_stringsrc_reset(&stringsrc, input_str, input_len);
-  upb_decoder_reset(&decoder, upb_stringsrc_allbytes(&stringsrc), NULL);
+  upb_decoder_reset(&decoder, upb_stringsrc_bytesrc(&stringsrc),
+                    0, UPB_NONDELIMITED, NULL);
   upb_decoder_decode(&decoder, &status);
   if(!upb_ok(&status)) goto err;
   return input_len;
diff --git a/benchmarks/parsetoproto2.upb.cc b/benchmarks/parsetoproto2.upb.cc
index 03a1039eec..75cd10c2fa 100644
--- a/benchmarks/parsetoproto2.upb.cc
+++ b/benchmarks/parsetoproto2.upb.cc
@@ -24,7 +24,6 @@
 #include <google/protobuf/descriptor.h>
 #undef private
 
-char *str;
 static size_t len;
 MESSAGE_CIDENT msg[NUM_MESSAGES];
 MESSAGE_CIDENT msg2;
@@ -54,13 +53,9 @@ upb_flow_t proto2_setstr(void *m, upb_value fval, upb_value val) {
   const upb_fielddef *f = upb_value_getfielddef(fval);
   std::string **str = (std::string**)UPB_INDEX(m, f->offset, 1);
   if (*str == f->default_ptr) *str = new std::string;
-  const upb_byteregion *ref = upb_value_getbyteregion(val);
-  uint32_t len;
-  (*str)->assign(
-      upb_byteregion_getptr(ref, upb_byteregion_startofs(ref), &len),
-      upb_byteregion_len(ref));
-  assert(len == upb_byteregion_len(ref));
+  const upb_strref *ref = upb_value_getstrref(val);
   // XXX: only supports contiguous strings atm.
+  (*str)->assign(ref->ptr, ref->len);
   return UPB_CONTINUE;
 }
 
@@ -69,13 +64,9 @@ upb_flow_t proto2_append_str(void *_r, upb_value fval, upb_value val) {
   typedef google::protobuf::RepeatedPtrField<std::string> R;
   (void)fval;
   R *r = (R*)_r;
-  const upb_byteregion *ref = upb_value_getbyteregion(val);
+  const upb_strref *ref = upb_value_getstrref(val);
   // XXX: only supports contiguous strings atm.
-  uint32_t len;
-  r->Add()->assign(
-      upb_byteregion_getptr(ref, upb_byteregion_startofs(ref), &len),
-      upb_byteregion_len(ref));
-  assert(len == upb_byteregion_len(ref));
+  r->Add()->assign(ref->ptr, ref->len);
   return UPB_CONTINUE;
 }
 
@@ -274,7 +265,7 @@ static bool initialize()
   upb_symtab_unref(s);
 
   // Read the message data itself.
-  str = upb_readfile(MESSAGE_FILE, &len);
+  char *str = upb_readfile(MESSAGE_FILE, &len);
   if(str == NULL) {
     fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
     return false;
@@ -284,6 +275,7 @@ static bool initialize()
   msg2.ParseFromArray(str, len);
 
   upb_stringsrc_init(&strsrc);
+  upb_stringsrc_reset(&strsrc, str, len);
   upb_handlers *h = upb_handlers_new();
   upb_accessors_reghandlers(h, def);
   if (!JIT) h->should_jit = false;
@@ -304,8 +296,8 @@ static size_t run(int i)
   (void)i;
   upb_status status = UPB_STATUS_INIT;
   msg[i % NUM_MESSAGES].Clear();
-  upb_stringsrc_reset(&strsrc, str, len);
-  upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), &msg[i % NUM_MESSAGES]);
+  upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc),
+                    0, UPB_NONDELIMITED, &msg[i % NUM_MESSAGES]);
   upb_decoder_decode(&d, &status);
   if(!upb_ok(&status)) goto err;
   return len;
diff --git a/benchmarks/parsetostruct.upb.c b/benchmarks/parsetostruct.upb.c
index 4eeafbb580..5e7aa3573b 100644
--- a/benchmarks/parsetostruct.upb.c
+++ b/benchmarks/parsetostruct.upb.c
@@ -8,7 +8,6 @@
 #include "upb/pb/glue.h"
 
 static const upb_msgdef *def;
-char *str;
 static size_t len;
 static void *msg[NUM_MESSAGES];
 static upb_stringsrc strsrc;
@@ -34,7 +33,7 @@ static bool initialize()
   upb_symtab_unref(s);
 
   // Read the message data itself.
-  str = upb_readfile(MESSAGE_FILE, &len);
+  char *str = upb_readfile(MESSAGE_FILE, &len);
   if(str == NULL) {
     fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
     return false;
@@ -44,6 +43,7 @@ static bool initialize()
     msg[i] = upb_stdmsg_new(def);
 
   upb_stringsrc_init(&strsrc);
+  upb_stringsrc_reset(&strsrc, str, len);
   upb_handlers *h = upb_handlers_new();
   upb_accessors_reghandlers(h, def);
   if (!JIT) h->should_jit = false;
@@ -70,8 +70,8 @@ static size_t run(int i)
   upb_status status = UPB_STATUS_INIT;
   i %= NUM_MESSAGES;
   upb_msg_clear(msg[i], def);
-  upb_stringsrc_reset(&strsrc, str, len);
-  upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), msg[i]);
+  upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc),
+                    0, UPB_NONDELIMITED, msg[i]);
   upb_decoder_decode(&d, &status);
   if(!upb_ok(&status)) goto err;
   return len;
diff --git a/bindings/cpp/upb/bytestream.cc b/bindings/cpp/upb/bytestream.cc
new file mode 100644
index 0000000000..df0797e736
--- /dev/null
+++ b/bindings/cpp/upb/bytestream.cc
@@ -0,0 +1,39 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+
+#include "bytestream.hpp"
+
+namespace upb {
+
+upb_bytesrc_vtbl* ByteSourceBase::vtable() {
+  static upb_bytesrc_vtbl vtbl = {
+    &ByteSourceBase::VFetch,
+    &ByteSourceBase::VDiscard,
+    &ByteSourceBase::VCopy,
+    &ByteSourceBase::VGetPtr,
+  };
+  return &vtbl;
+}
+
+upb_bytesuccess_t ByteSourceBase::VFetch(void *src, uint64_t ofs, size_t *len) {
+  return static_cast<ByteSourceBase*>(src)->Fetch(ofs, len);
+}
+
+void ByteSourceBase::VCopy(
+    const void *src, uint64_t ofs, size_t len, char* dest) {
+  static_cast<const ByteSourceBase*>(src)->Copy(ofs, len, dest);
+}
+
+void ByteSourceBase::VDiscard(void *src, uint64_t ofs) {
+  static_cast<ByteSourceBase*>(src)->Discard(ofs);
+}
+
+const char * ByteSourceBase::VGetPtr(
+    const void *src, uint64_t ofs, size_t* len) {
+  return static_cast<const ByteSourceBase*>(src)->GetPtr(ofs, len);
+}
+
+}  // namespace upb
diff --git a/bindings/cpp/upb/bytestream.hpp b/bindings/cpp/upb/bytestream.hpp
new file mode 100644
index 0000000000..968d542c2a
--- /dev/null
+++ b/bindings/cpp/upb/bytestream.hpp
@@ -0,0 +1,238 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// This file defines three core interfaces:
+// - upb::ByteSink: for writing streams of data.
+// - upb::ByteSource: for reading streams of data.
+// - upb::ByteRegion: for reading from a specific region of a ByteSource;
+//   should be used by decoders instead of using a ByteSource directly.
+//
+// These interfaces are used by streaming encoders and decoders: for example, a
+// protobuf parser gets its input from a upb::ByteRegion.  They are virtual
+// base classes so concrete implementations can get the data from a fd, a
+// FILE*, a string, etc.
+//
+// A ByteRegion represents a region of data from a ByteSource.
+//
+// Parsers get data from this interface instead of a bytesrc because we often
+// want to parse only a specific region of the input.  For example, if we parse
+// a string from our input but know that the string represents a protobuf, we
+// can pass its ByteRegion to an appropriate protobuf parser.
+//
+// Since the bytes may be coming from a file or network socket, bytes must be
+// fetched before they can be read (though in some cases this fetch may be a
+// no-op).  "fetch" is the only operation on a byteregion that could fail or
+// block, because it is the only operation that actually performs I/O.
+//
+// Bytes can be discarded when they are no longer needed.  Parsers should
+// always discard bytes they no longer need, both so the buffers can be freed
+// when possible and to give better visibility into what bytes the parser is
+// still using.
+//
+// start      discard                     read             fetch             end
+// ofs          ofs                       ofs               ofs              ofs
+// |             |--->Discard()            |                 |--->Fetch()      |
+// V             V                         V                 V                 V
+// +-------------+-------------------------+-----------------+-----------------+
+// |  discarded  |                         |                 |    fetchable    |
+// +-------------+-------------------------+-----------------+-----------------+
+//               | <------------- loaded ------------------> |
+//                                         | <- available -> |
+//                                         | <---------- remaining ----------> |
+//
+// Note that the start offset may be something other than zero!  A byteregion
+// is a view into an underlying bytesrc stream, and the region may start
+// somewhere other than the beginning of that stream.
+//
+// The region can be either delimited or nondelimited.  A non-delimited region
+// will keep returning data until the underlying data source returns EOF.  A
+// delimited region will return EOF at a predetermined offset.
+//
+//                       end
+//                       ofs
+//                         |
+//                         V
+// +-----------------------+
+// |  delimited region     |   <-- hard EOF, even if data source has more data.
+// +-----------------------+
+//
+// +------------------------
+// | nondelimited region   Z   <-- won't return EOF until data source hits EOF.
+// +------------------------
+
+#ifndef UPB_BYTESTREAM_HPP
+#define UPB_BYTESTREAM_HPP
+
+#include "upb/bytestream.h"
+#include "upb/upb.hpp"
+
+namespace upb {
+
+typedef upb_bytesuccess_t ByteSuccess;
+
+// Implement this interface to vend bytes to ByteRegions which will be used by
+// a decoder.
+class ByteSourceBase : public upb_bytesrc {
+ public:
+  ByteSourceBase() { upb_bytesrc_init(this, vtable()); }
+  virtual ~ByteSourceBase() { upb_bytesrc_uninit(this); }
+
+  // Fetches at least one byte starting at ofs, setting *len to the actual
+  // number of bytes fetched (or 0 on EOF or error: see return value for
+  // details).  It is valid for bytes to be fetched multiple times, as long as
+  // the bytes have not been previously discarded.
+  virtual ByteSuccess Fetch(uint64_t ofs, size_t* len) = 0;
+
+  // Discards all data prior to ofs (except data that is pinned, if pinning
+  // support is added -- see TODO below).
+  virtual void Discard(uint64_t ofs) = 0;
+
+  // Copies "len" bytes of data from ofs to "dst", which must be at least "len"
+  // bytes long.  The given region must not be discarded.
+  virtual void Copy(uint64_t ofs, size_t len, char *dst) const = 0;
+
+  // Returns a pointer to the bytesrc's internal buffer, storing in *len how
+  // much data is available.  The given offset must not be discarded.  The
+  // returned buffer is valid for as long as its bytes are not discarded (in
+  // the case that part of the returned buffer is discarded, only the
+  // non-discarded bytes remain valid).
+  virtual const char *GetPtr(uint64_t ofs, size_t *len) const = 0;
+
+  // TODO: Add if/when there is a demonstrated need:
+  //
+  // // When the caller pins a region (which must not be already discarded), it
+  // // is guaranteed that the region will not be discarded (nor will the
+  // // bytesrc be destroyed) until the region is unpinned.  However, not all
+  // // bytesrc's support pinning; a false return indicates that a pin was not
+  // // possible.
+  // virtual bool Pin(uint64_t ofs, size_t len);
+  //
+  // // Releases some number of pinned bytes from the beginning of a pinned
+  // // region (which may be fewer than the total number of bytes pinned).
+  // virtual void Unpin(uint64_t ofs, size_t len, size_t bytes_to_release);
+  //
+  // Adding pinning support would also involve adding a "pin_ofs" parameter to
+  // upb_bytesrc_fetch, so that the fetch can extend an already-pinned region.
+ private:
+  static upb_bytesrc_vtbl* vtable();
+  static upb_bytesuccess_t VFetch(void*, uint64_t, size_t*);
+  static void VDiscard(void*, uint64_t);
+  static void VCopy(const void*, uint64_t, size_t, char*);
+  static const char *VGetPtr(const void*, uint64_t, size_t*);
+};
+
+class ByteRegion : public upb_byteregion {
+ public:
+  static const uint64_t kNondelimited = UPB_NONDELIMITED;
+
+  ByteRegion() { upb_byteregion_init(this); }
+  ~ByteRegion() { upb_byteregion_uninit(this); }
+
+  // Accessors for the regions bounds -- the meaning of these is described in
+  // the diagram above.
+  uint64_t start_ofs() const { return upb_byteregion_startofs(this); }
+  uint64_t discard_ofs() const { return upb_byteregion_discardofs(this); }
+  uint64_t fetch_ofs() const { return upb_byteregion_fetchofs(this); }
+  uint64_t end_ofs() const { return upb_byteregion_endofs(this); }
+
+  // Returns how many bytes are fetched and available for reading starting from
+  // offset "offset".
+  uint64_t BytesAvailable(uint64_t offset) const {
+    return upb_byteregion_available(this, offset);
+  }
+
+  // Returns the total number of bytes remaining after offset "offset", or
+  // kNondelimited if the byteregion is non-delimited.
+  uint64_t BytesRemaining(uint64_t offset) const {
+    return upb_byteregion_remaining(this, offset);
+  }
+
+  uint64_t Length() const { return upb_byteregion_len(this); }
+
+  // Sets the value of this byteregion to be a subset of the given byteregion's
+  // data.  The caller is responsible for releasing this region before the src
+  // region is released (unless the region is first pinned, if pinning support
+  // is added.  see below).
+  void Reset(const upb_byteregion *src, uint64_t ofs, uint64_t len) {
+    upb_byteregion_reset(this, src, ofs, len);
+  }
+  void Release() { upb_byteregion_release(this); }
+
+  // Attempts to fetch more data, extending the fetched range of this
+  // byteregion.  Returns true if the fetched region was extended by at least
+  // one byte, false on EOF or error (see *s for details).
+  ByteSuccess Fetch() { return upb_byteregion_fetch(this); }
+
+  // Fetches all remaining data, returning false if the operation failed (see
+  // *s for details).  May only be used on delimited byteregions.
+  ByteSuccess FetchAll() { return upb_byteregion_fetchall(this); }
+
+  // Discards bytes from the byteregion up until ofs (which must be greater or
+  // equal to discard_ofs()).  It is valid to discard bytes that have not been
+  // fetched (such bytes will never be fetched) but it is an error to discard
+  // past the end of a delimited byteregion.
+  void Discard(uint64_t ofs) { return upb_byteregion_discard(this, ofs); }
+
+  // Copies "len" bytes of data into "dst", starting at ofs.  The specified
+  // region must be available.
+  void Copy(uint64_t ofs, size_t len, char *dst) const {
+    upb_byteregion_copy(this, ofs, len, dst);
+  }
+
+  // Copies all bytes from the byteregion into dst.  Requires that the entire
+  // byteregion is fetched and that none has been discarded.
+  void CopyAll(char *dst) const {
+    upb_byteregion_copyall(this, dst);
+  }
+
+  // Returns a pointer to the internal buffer for the byteregion starting at
+  // offset "ofs." Stores the number of bytes available in this buffer in *len.
+  // The returned buffer is invalidated when the byteregion is reset or
+  // released, or when the bytes are discarded.  If the byteregion is not
+  // currently pinned, the pointer is only valid for the lifetime of the parent
+  // byteregion.
+  const char *GetPtr(uint64_t ofs, size_t *len) const {
+    return upb_byteregion_getptr(this, ofs, len);
+  }
+
+  // Copies the contents of the byteregion into a newly-allocated,
+  // NULL-terminated string.  Requires that the byteregion is fully fetched.
+  char *StrDup() const {
+    return upb_byteregion_strdup(this);
+  }
+
+  // TODO: add if/when there is a demonstrated need.
+  //
+  // // Pins this byteregion's bytes in memory, allowing it to outlive its
+  // // parent byteregion.  Normally a byteregion may only be used while its
+  // // parent is still valid, but a pinned byteregion may continue to be used
+  // // until it is reset or released.  A byteregion must be fully fetched to
+  // // be pinned (this implies that the byteregion must be delimited).
+  // //
+  // // In some cases this operation may cause the input data to be copied.
+  // //
+  // // void Pin();
+};
+
+class StringSource : public upb_stringsrc {
+ public:
+  StringSource() : upb_stringsrc() { upb_stringsrc_init(this); }
+  ~StringSource() { upb_stringsrc_uninit(this); }
+
+  void Reset(const char* data, size_t len) {
+    upb_stringsrc_reset(this, data, len);
+  }
+
+  ByteRegion* AllBytes() {
+    return static_cast<ByteRegion*>(upb_stringsrc_allbytes(this));
+  }
+
+  upb_bytesrc* ByteSource() { return upb_stringsrc_bytesrc(this); }
+};
+
+}  // namespace upb
+
+#endif
diff --git a/bindings/cpp/upb/def.hpp b/bindings/cpp/upb/def.hpp
index ac9aff1796..030ba40e86 100644
--- a/bindings/cpp/upb/def.hpp
+++ b/bindings/cpp/upb/def.hpp
@@ -1,42 +1,41 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2011 Google Inc.  See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- *
- * The set of upb::*Def classes and upb::SymbolTable allow for defining and
- * manipulating schema information (as defined in .proto files).
- *
- * Defs go through two distinct phases of life:
- *
- * 1. MUTABLE: when first created, the properties of the def can be set freely
- *    (for example a message's name, its list of fields, the name/number of
- *    fields, etc).  During this phase the def is *not* thread-safe, and may
- *    not be used for any purpose except to set its properties (it can't be
- *    used to parse anything, create any messages in memory, etc).
- *
- * 2. FINALIZED: after being added to a symtab (which links the defs together)
- *    the defs become finalized (thread-safe and immutable).  Programs may only
- *    access defs through a CONST POINTER during this stage -- upb_symtab will
- *    help you out with this requirement by only vending const pointers, but
- *    you need to make sure not to use any non-const pointers you still have
- *    sitting around.  In practice this means that you may not call any setters
- *    on the defs (or functions that themselves call the setters).  If you want
- *    to modify an existing immutable def, copy it with upb_*_dup(), modify the
- *    copy, and add the modified def to the symtab (replacing the existing
- *    def).
- *
- * You can test for which stage of life a def is in by calling
- * upb::Def::IsMutable().  This is particularly useful for dynamic language
- * bindings, which must properly guarantee that the dynamic language cannot
- * break the rules laid out above.
- *
- * It would be possible to make the defs thread-safe during stage 1 by using
- * mutexes internally and changing any methods returning pointers to return
- * copies instead.  This could be important if we are integrating with a VM or
- * interpreter that does not naturally serialize access to wrapped objects (for
- * example, in the case of Python this is not necessary because of the GIL).
- */
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// The set of upb::*Def classes and upb::SymbolTable allow for defining and
+// manipulating schema information (as defined in .proto files).
+//
+// Defs go through two distinct phases of life:
+//
+// 1. MUTABLE: when first created, the properties of the def can be set freely
+//    (for example a message's name, its list of fields, the name/number of
+//    fields, etc).  During this phase the def is *not* thread-safe, and may
+//    not be used for any purpose except to set its properties (it can't be
+//    used to parse anything, create any messages in memory, etc).
+//
+// 2. FINALIZED: after being added to a symtab (which links the defs together)
+//    the defs become finalized (thread-safe and immutable).  Programs may only
+//    access defs through a CONST POINTER during this stage -- upb_symtab will
+//    help you out with this requirement by only vending const pointers, but
+//    you need to make sure not to use any non-const pointers you still have
+//    sitting around.  In practice this means that you may not call any setters
+//    on the defs (or functions that themselves call the setters).  If you want
+//    to modify an existing immutable def, copy it with upb_*_dup(), modify the
+//    copy, and add the modified def to the symtab (replacing the existing
+//    def).
+//
+// You can test for which stage of life a def is in by calling
+// upb::Def::IsMutable().  This is particularly useful for dynamic language
+// bindings, which must properly guarantee that the dynamic language cannot
+// break the rules laid out above.
+//
+// It would be possible to make the defs thread-safe during stage 1 by using
+// mutexes internally and changing any methods returning pointers to return
+// copies instead.  This could be important if we are integrating with a VM or
+// interpreter that does not naturally serialize access to wrapped objects (for
+// example, in the case of Python this is not necessary because of the GIL).
 
 #ifndef UPB_DEF_HPP
 #define UPB_DEF_HPP
diff --git a/bindings/cpp/upb/handlers.hpp b/bindings/cpp/upb/handlers.hpp
index 07683f6130..d356a33de3 100644
--- a/bindings/cpp/upb/handlers.hpp
+++ b/bindings/cpp/upb/handlers.hpp
@@ -1,15 +1,14 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2011 Google Inc.  See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- *
- * upb::Handlers is a generic visitor-like interface for iterating over a
- * stream of protobuf data.  You can register function pointers that will be
- * called for each message and/or field as the data is being parsed or iterated
- * over, without having to know the source format that we are parsing from.
- * This decouples the parsing logic from the processing logic.
- */
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// upb::Handlers is a generic visitor-like interface for iterating over a
+// stream of protobuf data.  You can register function pointers that will be
+// called for each message and/or field as the data is being parsed or iterated
+// over, without having to know the source format that we are parsing from.
+// This decouples the parsing logic from the processing logic.
 
 #ifndef UPB_HANDLERS_HPP
 #define UPB_HANDLERS_HPP
@@ -18,6 +17,7 @@
 
 namespace upb {
 
+typedef upb_fieldtype_t FieldType;
 typedef upb_flow_t Flow;
 class MessageHandlers;
 
@@ -30,8 +30,8 @@ class FieldHandlers : public upb_fhandlers {
   // The FieldHandlers will live at least as long as the upb::Handlers to
   // which it belongs, but can be Ref'd/Unref'd to make it live longer (which
   // will prolong the life of the underlying upb::Handlers also).
-  void Ref()   const { upb_fhandlers_ref(this); }
-  void Unref() const { upb_fhandlers_unref(this); }
+  void Ref()   { upb_fhandlers_ref(this); }
+  void Unref() { upb_fhandlers_unref(this); }
 
   // Functions to set this field's handlers.
   // These return "this" so they can be conveniently chained, eg.
@@ -46,13 +46,13 @@ class FieldHandlers : public upb_fhandlers {
     upb_fhandlers_setstartseq(this, h); return this;
   }
   FieldHandlers* SetEndSequenceHandler(EndFieldHandler* h) {
-    upb_fhandlers_endseq(this, h); return this;
+    upb_fhandlers_setendseq(this, h); return this;
   }
   FieldHandlers* SetStartSubmessageHandler(StartFieldHandler* h) {
     upb_fhandlers_setstartsubmsg(this, h); return this;
   }
   FieldHandlers* SetEndSubmessageHandler(EndFieldHandler* h) {
-    upb_fhandlers_endsubmsg(this, h); return this;
+    upb_fhandlers_setendsubmsg(this, h); return this;
   }
 
   // Get/Set the field's bound value, which will be passed to its handlers.
@@ -62,27 +62,20 @@ class FieldHandlers : public upb_fhandlers {
   }
 
   // Returns the MessageHandlers to which we belong.
-  MessageHandlers* GetMessageHandlers() const {
-    return upb_fhandlers_msg(this);
-  }
-
+  MessageHandlers* GetMessageHandlers() const;
   // Returns the MessageHandlers for this field's submessage (invalid to call
   // unless this field's type UPB_TYPE(MESSAGE) or UPB_TYPE(GROUP).
-  MessageHandlers* GetSubMessageHandlers() const {
-    return upb_fhandlers_submsg(this);
-  }
-
+  MessageHandlers* GetSubMessageHandlers() const;
   // If set to >=0, the given hasbit will be set after the value callback is
-  // called (relative to the current closure).
-  int32_t GetValueHasbit() const { return upb_fhandler_valuehasbit(this); }
-  void SetValueHasbit(int32_t bit) { upb_fhandler_setvaluehasbit(this, bit); }
+  // called (offset relative to the current closure).
+  int32_t GetValueHasbit() const { return upb_fhandlers_getvaluehasbit(this); }
+  void SetValueHasbit(int32_t bit) { upb_fhandlers_setvaluehasbit(this, bit); }
 
  private:
   FieldHandlers();  // Only created by upb::Handlers.
   ~FieldHandlers(); // Only destroyed by refcounting.
 };
 
-
 class MessageHandlers : public upb_mhandlers {
  public:
   typedef upb_startmsg_handler StartMessageHandler;
@@ -91,8 +84,8 @@ class MessageHandlers : public upb_mhandlers {
   // The MessageHandlers will live at least as long as the upb::Handlers to
   // which it belongs, but can be Ref'd/Unref'd to make it live longer (which
   // will prolong the life of the underlying upb::Handlers also).
-  void Ref()   const { upb_mhandlers_ref(this); }
-  void Unref() const { upb_mhandlers_unref(this); }
+  void Ref()    { upb_mhandlers_ref(this); }
+  void Unref()  { upb_mhandlers_unref(this); }
 
   // Functions to set this message's handlers.
   // These return "this" so they can be conveniently chained, eg.
@@ -107,12 +100,10 @@ class MessageHandlers : public upb_mhandlers {
   }
 
   // Functions to create new FieldHandlers for this message.
-  FieldHandlers* NewFieldHandlers(uint32_t fieldnum, upb_fieldtype_t type,
+  FieldHandlers* NewFieldHandlers(uint32_t fieldnum, FieldType type,
                                   bool repeated) {
-    return upb_mhandlers_newfhandlers(this, fieldnum, type, repeated);
-  }
-  FieldHandlers* NewFieldHandlers(FieldDef* f) {
-    return upb_mhandlers_newfhandlers_fordef(f);
+    return static_cast<FieldHandlers*>(
+        upb_mhandlers_newfhandlers(this, fieldnum, type, repeated));
   }
 
   // Like the previous but for MESSAGE or GROUP fields.  For GROUP fields, the
@@ -120,15 +111,10 @@ class MessageHandlers : public upb_mhandlers {
   FieldHandlers* NewFieldHandlersForSubmessage(uint32_t n, const char *name,
                                                FieldType type, bool repeated,
                                                MessageHandlers* subm) {
-    return upb_mhandlers_newsubmsgfhandlers(this, n, type, repeated, subm);
-  }
-
-  FieldHandlers* NewFieldHandlersForSubmessage(FieldDef* f,
-                                               MessageHandlers* subm) {
-    return upb_mhandlers_newsubmsgfhandlers_fordef(f);
+    return static_cast<FieldHandlers*>(
+        upb_mhandlers_newfhandlers_subm(this, n, type, repeated, subm));
   }
 
-
  private:
   MessageHandlers();  // Only created by upb::Handlers.
   ~MessageHandlers(); // Only destroyed by refcounting.
@@ -137,26 +123,31 @@ class MessageHandlers : public upb_mhandlers {
 class Handlers : public upb_handlers {
  public:
   // Creates a new Handlers instance.
-  Handlers* New() { return static_cast<Handlers*>(upb_handlers_new()); }
+  static Handlers* New() { return static_cast<Handlers*>(upb_handlers_new()); }
 
   void Ref()   { upb_handlers_ref(this); }
   void Unref() { upb_handlers_unref(this); }
 
   // Returns a new MessageHandlers object.  The first such message that is
   // obtained will be the top-level message for this Handlers object.
-  MessageHandlers* NewMessageHandlers() { return upb_handlers_newmhandlers(this); }
-
-  // Freezes the handlers against future modification.  Handlers must be
-  // finalized before they can be passed to a data producer.  After Finalize()
-  // has been called, you may only call const methods on the Handlers and its
-  // MessageHandlers/FieldHandlers.
-  void Finalize() { upb_handlers_finalize(this); }
+  MessageHandlers* NewMessageHandlers() {
+    return static_cast<MessageHandlers*>(upb_handlers_newmhandlers(this));
+  }
 
  private:
-  FieldHandlers();  // Only created by Handlers::New().
-  ~FieldHandlers(); // Only destroyed by refcounting.
+  Handlers();  // Only created by Handlers::New().
+  ~Handlers(); // Only destroyed by refcounting.
 };
 
+
+MessageHandlers* FieldHandlers::GetMessageHandlers() const {
+  return static_cast<MessageHandlers*>(upb_fhandlers_getmsg(this));
+}
+
+MessageHandlers* FieldHandlers::GetSubMessageHandlers() const {
+  return static_cast<MessageHandlers*>(upb_fhandlers_getsubmsg(this));
+}
+
 }  // namespace upb
 
 #endif
diff --git a/bindings/cpp/upb/pb/decoder.hpp b/bindings/cpp/upb/pb/decoder.hpp
new file mode 100644
index 0000000000..05bcb8a787
--- /dev/null
+++ b/bindings/cpp/upb/pb/decoder.hpp
@@ -0,0 +1,83 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// upb::Decoder is a high performance, streaming decoder for protobuf
+// data that works by getting its input data from a ubp::ByteRegion and calling
+// into a upb::Handlers.
+//
+// A DecoderPlan contains whatever data structures and generated (JIT-ted) code
+// are necessary to decode protobuf data of a specific type to a specific set
+// of handlers.  By generating the plan ahead of time, we avoid having to
+// redo this work every time we decode.
+//
+// A DecoderPlan is threadsafe, meaning that it can be used concurrently by
+// different upb::Decoders in different threads.  However, the upb::Decoders are
+// *not* thread-safe.
+
+#ifndef UPB_PB_DECODER_HPP
+#define UPB_PB_DECODER_HPP
+
+#include "upb/pb/decoder.h"
+
+#include "upb/bytestream.hpp"
+#include "upb/upb.hpp"
+
+namespace upb {
+
+class DecoderPlan : public upb_decoderplan {
+ public:
+  static DecoderPlan* New(Handlers* h, bool allow_jit) {
+    return static_cast<DecoderPlan*>(upb_decoderplan_new(h, allow_jit));
+  }
+  void Unref() { upb_decoderplan_unref(this); }
+
+  // Returns true if the plan contains JIT-ted code.  This may not be the same
+  // as the "allowjit" parameter to the constructor if support for JIT-ting was
+  // not compiled in.
+  bool HasJitCode() { return upb_decoderplan_hasjitcode(this); }
+
+ private:
+  DecoderPlan() {}  // Only constructed by New
+};
+
+class Decoder : public upb_decoder {
+ public:
+  Decoder() { upb_decoder_init(this); }
+  ~Decoder() { upb_decoder_uninit(this); }
+
+  // Resets the plan that the decoder will parse from.  This will also reset the
+  // decoder's input to be uninitialized -- ResetInput() must be called before
+  // parsing can occur.  The plan must live until the decoder is destroyed or
+  // reset to a different plan.
+  //
+  // Must be called before ResetInput() or Decode().
+  void ResetPlan(DecoderPlan* plan, int32_t msg_offset) {
+    upb_decoder_resetplan(this, plan, msg_offset);
+  }
+
+  // Resets the input of the decoder.  This puts it in a state where it has not
+  // seen any data, and expects the next data to be from the beginning of a new
+  // protobuf.
+  //
+  // ResetInput() must be called before Decode() but may be called more than
+  // once.  "input" must live until the decoder destroyed or ResetInput is
+  // called again.  "c" is the closure that will be passed to the handlers.
+  void ResetInput(ByteRegion* byte_region, void* c) {
+    upb_decoder_resetinput(this, byte_region, c);
+  }
+
+  // Decodes serialized data (calling Handlers as the data is parsed) until
+  // error or EOF (see status() for details).
+  Success Decode() { return upb_decoder_decode(this); }
+
+  const upb::Status& status() {
+    return static_cast<const upb::Status&>(*upb_decoder_status(this));
+  }
+};
+
+}  // namespace upb
+
+#endif
diff --git a/bindings/cpp/upb/upb.hpp b/bindings/cpp/upb/upb.hpp
index 4fb337dd9c..226859c459 100644
--- a/bindings/cpp/upb/upb.hpp
+++ b/bindings/cpp/upb/upb.hpp
@@ -1,23 +1,34 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2011 Google Inc.  See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- */
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
 
 #ifndef UPB_HPP
 #define UPB_HPP
 
 #include "upb/upb.h"
+#include <iostream>
 
 namespace upb {
 
+typedef upb_success_t Success;
+
 class Status : public upb_status {
  public:
   Status() { upb_status_init(this); }
   ~Status() { upb_status_uninit(this); }
 
+  bool ok() const { return upb_ok(this); }
+  bool eof() const { return upb_eof(this); }
+
   const char *GetString() const { return upb_status_getstr(this); }
+  void SetEof() { upb_status_seteof(this); }
+  void SetErrorLiteral(const char* msg) {
+    upb_status_seterrliteral(this, msg);
+  }
+
+  void Clear() { upb_status_clear(this); }
 };
 
 class Value : public upb_value {
diff --git a/examples/stream_transcode.c b/examples/stream_transcode.c
new file mode 100644
index 0000000000..21c375bce6
--- /dev/null
+++ b/examples/stream_transcode.c
@@ -0,0 +1,76 @@
+
+#include <stdlib.h>
+#include "upb/bytestream.h"
+#include "upb/pb/decoder.h"
+#include "upb/pb/glue.h"
+#include "upb/pb/textprinter.h"
+
+int main(int argc, char *argv[]) {
+  if (argc < 3) {
+    fprintf(stderr, "Usage: stream_transcode <descfile> <msgname>\n");
+    return 1;
+  }
+
+  upb_symtab *symtab = upb_symtab_new();
+  size_t desc_len;
+  const char *desc = upb_readfile(argv[1], &desc_len);
+  if (!desc) {
+    fprintf(stderr, "Couldn't open descriptor file: %s\n", argv[1]);
+    return 1;
+  }
+
+  upb_status status = UPB_STATUS_INIT;
+  upb_load_descriptor_into_symtab(symtab, desc, desc_len, &status);
+  if (!upb_ok(&status)) {
+    fprintf(stderr, "Error parsing descriptor: %s", upb_status_getstr(&status));
+    return 1;
+  }
+  free((void*)desc);
+
+  const upb_def *md = upb_symtab_lookup(symtab, argv[2]);
+  if (!md) {
+    fprintf(stderr, "Descriptor did not contain message: %s\n", argv[2]);
+    return 1;
+  }
+
+  const upb_msgdef *m = upb_dyncast_msgdef_const(md);
+  if (!m) {
+    fprintf(stderr, "Def was not a msgdef.\n");
+    return 1;
+  }
+
+  upb_stdio in, out;
+  upb_stdio_init(&in);
+  upb_stdio_init(&out);
+  upb_stdio_reset(&in, stdin);
+  upb_stdio_reset(&out, stdout);
+
+  upb_handlers *handlers = upb_handlers_new();
+  upb_textprinter *p = upb_textprinter_new();
+  upb_textprinter_reset(p, upb_stdio_bytesink(&out), false);
+  upb_textprinter_reghandlers(handlers, m);
+
+  upb_decoder d;
+  upb_decoder_init(&d, handlers);
+  upb_decoder_reset(&d, upb_stdio_bytesrc(&in), 0, UPB_NONDELIMITED, p);
+
+  upb_status_clear(&status);
+  upb_decoder_decode(&d, &status);
+
+  if (!upb_ok(&status)) {
+    fprintf(stderr, "Error parsing input: %s", upb_status_getstr(&status));
+  }
+
+  upb_status_uninit(&status);
+  upb_stdio_uninit(&in);
+  upb_stdio_uninit(&out);
+  upb_decoder_uninit(&d);
+  upb_textprinter_free(p);
+  upb_def_unref(UPB_UPCAST(m));
+  upb_symtab_unref(symtab);
+
+  // Prevent C library from holding buffers open, so Valgrind doesn't see
+  // memory leaks.
+  fclose(stdin);
+  fclose(stdout);
+}
diff --git a/tests/test_cpp.cc b/tests/test_cpp.cc
index ecf27bf14c..5182217725 100644
--- a/tests/test_cpp.cc
+++ b/tests/test_cpp.cc
@@ -9,7 +9,11 @@
 
 #include <stdio.h>
 #include <iostream>
+#include "upb/bytestream.hpp"
 #include "upb/def.hpp"
+#include "upb/handlers.hpp"
+#include "upb/upb.hpp"
+#include "upb/pb/decoder.hpp"
 #include "upb/pb/glue.hpp"
 
 static void TestSymbolTable(const char *descriptor_file) {
@@ -26,11 +30,22 @@ static void TestSymbolTable(const char *descriptor_file) {
   md->Unref();
 }
 
+static void TestByteStream() {
+  upb::StringSource stringsrc;
+  stringsrc.Reset("testing", 7);
+  upb::ByteRegion* byteregion = stringsrc.AllBytes();
+  assert(byteregion->FetchAll() == UPB_BYTE_OK);
+  char* str = byteregion->StrDup();
+  assert(strcmp(str, "testing") == 0);
+  free(str);
+}
+
 int main(int argc, char *argv[]) {
   if (argc < 2) {
     fprintf(stderr, "Usage: test_cpp <descriptor file>\n");
     return 1;
   }
   TestSymbolTable(argv[1]);
+  TestByteStream();
   return 0;
 }
diff --git a/tests/test_decoder.c b/tests/test_decoder.c
index 84a90cdee6..0db3bfa0d3 100644
--- a/tests/test_decoder.c
+++ b/tests/test_decoder.c
@@ -1,76 +1,666 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc.  See LICENSE for details.
+ *
+ * An exhaustive set of tests for parsing both valid and invalid protobuf
+ * input, with buffer breaks in arbitrary places.
+ *
+ * Tests to add:
+ * - unknown field handler called appropriately
+ * - unknown fields can be inserted in random places
+ * - fuzzing of valid input
+ * - resource limits (max stack depth, max string len)
+ * - testing of groups
+ * - more throrough testing of sequences
+ * - test skipping of submessages
+ * - test suspending the decoder
+ * - buffers that are close enough to the end of the address space that
+ *   pointers overflow (this might be difficult).
+ * - a few "kitchen sink" examples (one proto that uses all types, lots
+ *   of submsg/sequences, etc.
+ */
 
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdint.h>
 #include <stdlib.h>
-#include "upb/bytestream.h"
+#include <string.h>
+#include "upb/handlers.h"
 #include "upb/pb/decoder.h"
-#include "upb/pb/glue.h"
-#include "upb/pb/textprinter.h"
+#include "upb/pb/varint.h"
+#include "upb/upb.h"
+#include "upb_test.h"
 
-int main(int argc, char *argv[]) {
-  if (argc < 3) {
-    fprintf(stderr, "Usage: test_decoder <descfile> <msgname>\n");
-    return 1;
+typedef struct {
+  char *buf;
+  size_t len;
+} buffer;
+
+// Mem is initialized to NULL.
+buffer *buffer_new(size_t len) {
+  buffer *buf = malloc(sizeof(*buf));
+  buf->buf = malloc(len);
+  buf->len = len;
+  memset(buf->buf, 0, buf->len);
+  return buf;
+}
+
+buffer *buffer_new2(const void *data, size_t len) {
+  buffer *buf = buffer_new(len);
+  memcpy(buf->buf, data, len);
+  return buf;
+}
+
+buffer *buffer_new3(const char *data) {
+  return buffer_new2(data, strlen(data));
+}
+
+buffer *buffer_dup(buffer *buf) { return buffer_new2(buf->buf, buf->len); }
+
+void buffer_free(buffer *buf) {
+  free(buf->buf);
+  free(buf);
+}
+
+void buffer_appendf(buffer *buf, const char *fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+  size_t size = buf->len;
+  buf->len += upb_vrprintf(&buf->buf, &size, buf->len, fmt, args);
+  va_end(args);
+}
+
+void buffer_cat(buffer *buf, buffer *buf2) {
+  size_t newlen = buf->len + buf2->len;
+  buf->buf = realloc(buf->buf, newlen);
+  memcpy(buf->buf + buf->len, buf2->buf, buf2->len);
+  buf->len = newlen;
+  buffer_free(buf2);
+}
+
+bool buffer_eql(buffer *buf, buffer *buf2) {
+  return buf->len == buf2->len && memcmp(buf->buf, buf2->buf, buf->len) == 0;
+}
+
+
+/* Routines for building arbitrary protos *************************************/
+
+buffer *cat(buffer *arg1, ...) {
+  va_list ap;
+  buffer *arg;
+  va_start(ap, arg1);
+  while ((arg = va_arg(ap, buffer*)) != NULL) {
+    buffer_cat(arg1, arg);
   }
+  va_end(ap);
+  return arg1;
+}
+
+buffer *varint(uint64_t x) {
+  buffer *buf = buffer_new(UPB_PB_VARINT_MAX_LEN + 1);
+  buf->len = upb_vencode64(x, buf->buf);
+  return buf;
+}
+
+// TODO: proper byte-swapping for big-endian machines.
+buffer *fixed32(void *data) { return buffer_new2(data, 4); }
+buffer *fixed64(void *data) { return buffer_new2(data, 8); }
+
+buffer *delim(buffer *buf) { return cat( varint(buf->len), buf, NULL ); }
+buffer *uint32(uint32_t u32) { return fixed32(&u32); }
+buffer *uint64(uint64_t u64) { return fixed64(&u64); }
+buffer *flt(float f) { return fixed32(&f); }
+buffer *dbl(double d) { return fixed64(&d); }
+buffer *zz32(int32_t x) { return varint(upb_zzenc_32(x)); }
+buffer *zz64(int64_t x) { return varint(upb_zzenc_64(x)); }
+
+buffer *tag(uint32_t fieldnum, char wire_type) {
+  return varint((fieldnum << 3) | wire_type);
+}
+
+buffer *submsg(uint32_t fn, buffer *buf) {
+  return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf), NULL );
+}
 
-  upb_symtab *symtab = upb_symtab_new();
-  size_t desc_len;
-  const char *desc = upb_readfile(argv[1], &desc_len);
-  if (!desc) {
-    fprintf(stderr, "Couldn't open descriptor file: %s\n", argv[1]);
-    return 1;
+
+/* A set of handlers that covers all .proto types *****************************/
+
+// The handlers simply append to a string indicating what handlers were called.
+// This string is similar to protobuf text format but fields are referred to by
+// number instead of name and sequences are explicitly delimited.
+
+#define VALUE_HANDLER(member, fmt) \
+  upb_flow_t value_ ## member(void *closure, upb_value fval, upb_value val) { \
+    buffer_appendf(closure, "%" PRIu32 ":%" fmt "; ",                         \
+                   upb_value_getuint32(fval), upb_value_get ## member(val));  \
+    return UPB_CONTINUE;                                                      \
   }
 
-  upb_status status = UPB_STATUS_INIT;
-  upb_load_descriptor_into_symtab(symtab, desc, desc_len, &status);
-  if (!upb_ok(&status)) {
-    fprintf(stderr, "Error parsing descriptor: %s", upb_status_getstr(&status));
-    return 1;
+VALUE_HANDLER(uint32, PRIu32)
+VALUE_HANDLER(uint64, PRIu64)
+VALUE_HANDLER(int32, PRId32)
+VALUE_HANDLER(int64, PRId64)
+VALUE_HANDLER(float, "g")
+VALUE_HANDLER(double, "g")
+
+upb_flow_t value_bool(void *closure, upb_value fval, upb_value val) {
+  buffer_appendf(closure, "%" PRIu32 ":%s; ",
+                 upb_value_getuint32(fval),
+                 upb_value_getbool(val) ? "true" : "false");
+  return UPB_CONTINUE;
+}
+
+upb_flow_t value_string(void *closure, upb_value fval, upb_value val) {
+  // Note: won't work with strings that contain NULL.
+  char *str = upb_byteregion_strdup(upb_value_getbyteregion(val));
+  buffer_appendf(closure, "%" PRIu32 ":%s; ", upb_value_getuint32(fval), str);
+  free(str);
+  return UPB_CONTINUE;
+}
+
+upb_sflow_t startsubmsg(void *closure, upb_value fval) {
+  buffer_appendf(closure, "%" PRIu32 ":{ ", upb_value_getuint32(fval));
+  return UPB_CONTINUE_WITH(closure);
+}
+
+upb_flow_t endsubmsg(void *closure, upb_value fval) {
+  buffer_appendf(closure, "} ");
+  return UPB_CONTINUE;
+}
+
+upb_sflow_t startseq(void *closure, upb_value fval) {
+  buffer_appendf(closure, "%" PRIu32 ":[ ", upb_value_getuint32(fval));
+  return UPB_CONTINUE_WITH(closure);
+}
+
+upb_flow_t endseq(void *closure, upb_value fval) {
+  buffer_appendf(closure, "] ");
+  return UPB_CONTINUE;
+}
+
+void doreg(upb_mhandlers *m, uint32_t num, upb_fieldtype_t type, bool repeated,
+           upb_value_handler *handler) {
+  upb_fhandlers *f = upb_mhandlers_newfhandlers(m, num, type, repeated);
+  ASSERT(f);
+  upb_fhandlers_setvalue(f, handler);
+  upb_fhandlers_setstartseq(f, &startseq);
+  upb_fhandlers_setendseq(f, &endseq);
+  upb_fhandlers_setfval(f, upb_value_uint32(num));
+}
+
+// The repeated field number to correspond to the given non-repeated field
+// number.
+uint32_t rep_fn(uint32_t fn) {
+  return (UPB_MAX_FIELDNUMBER - 1000) + fn;
+}
+
+#define NOP_FIELD 40
+#define UNKNOWN_FIELD 666
+
+void reg(upb_mhandlers *m, upb_fieldtype_t type, upb_value_handler *handler) {
+  // We register both a repeated and a non-repeated field for every type.
+  // For the non-repeated field we make the field number the same as the
+  // type.  For the repeated field we make it a function of the type.
+  doreg(m, type, type, false, handler);
+  doreg(m, rep_fn(type), type, true, handler);
+}
+
+void reg_subm(upb_mhandlers *m, uint32_t num, upb_fieldtype_t type,
+              bool repeated) {
+  upb_fhandlers *f =
+      upb_mhandlers_newfhandlers_subm(m, num, type, repeated, m);
+  ASSERT(f);
+  upb_fhandlers_setstartseq(f, &startseq);
+  upb_fhandlers_setendseq(f, &endseq);
+  upb_fhandlers_setstartsubmsg(f, &startsubmsg);
+  upb_fhandlers_setendsubmsg(f, &endsubmsg);
+  upb_fhandlers_setfval(f, upb_value_uint32(num));
+}
+
+void reghandlers(upb_mhandlers *m) {
+  // Register handlers for each type.
+  reg(m, UPB_TYPE(DOUBLE),   &value_double);
+  reg(m, UPB_TYPE(FLOAT),    &value_float);
+  reg(m, UPB_TYPE(INT64),    &value_int64);
+  reg(m, UPB_TYPE(UINT64),   &value_uint64);
+  reg(m, UPB_TYPE(INT32) ,   &value_int32);
+  reg(m, UPB_TYPE(FIXED64),  &value_uint64);
+  reg(m, UPB_TYPE(FIXED32),  &value_uint32);
+  reg(m, UPB_TYPE(BOOL),     &value_bool);
+  reg(m, UPB_TYPE(STRING),   &value_string);
+  reg(m, UPB_TYPE(BYTES),    &value_string);
+  reg(m, UPB_TYPE(UINT32),   &value_uint32);
+  reg(m, UPB_TYPE(ENUM),     &value_int32);
+  reg(m, UPB_TYPE(SFIXED32), &value_int32);
+  reg(m, UPB_TYPE(SFIXED64), &value_int64);
+  reg(m, UPB_TYPE(SINT32),   &value_int32);
+  reg(m, UPB_TYPE(SINT64),   &value_int64);
+
+  // Register submessage/group handlers that are self-recursive
+  // to this type, eg: message M { optional M m = 1; }
+  reg_subm(m, UPB_TYPE(MESSAGE),         UPB_TYPE(MESSAGE), false);
+  reg_subm(m, UPB_TYPE(GROUP),           UPB_TYPE(GROUP),   false);
+  reg_subm(m, rep_fn(UPB_TYPE(MESSAGE)), UPB_TYPE(MESSAGE), true);
+  reg_subm(m, rep_fn(UPB_TYPE(GROUP)),   UPB_TYPE(GROUP),   true);
+
+  // Register a no-op string field so we can pad the proto wherever we want.
+  upb_mhandlers_newfhandlers(m, NOP_FIELD, UPB_TYPE(STRING), false);
+}
+
+
+/* Custom bytesrc that can insert buffer seams in arbitrary places ************/
+
+typedef struct {
+  upb_bytesrc bytesrc;
+  const char *str;
+  size_t len, seam1, seam2;
+  upb_byteregion byteregion;
+} upb_seamsrc;
+
+size_t upb_seamsrc_avail(const upb_seamsrc *src, size_t ofs) {
+  if (ofs < src->seam1) return src->seam1 - ofs;
+  if (ofs < src->seam2) return src->seam2 - ofs;
+  return src->len - ofs;
+}
+
+upb_bytesuccess_t upb_seamsrc_fetch(void *_src, uint64_t ofs, size_t *read) {
+  upb_seamsrc *src = _src;
+  assert(ofs < src->len);
+  if (ofs == src->len) {
+    upb_status_seteof(&src->bytesrc.status);
+    return UPB_BYTE_EOF;
   }
-  free((void*)desc);
+  *read = upb_seamsrc_avail(src, ofs);
+  return UPB_BYTE_OK;
+}
+
+void upb_seamsrc_copy(const void *_src, uint64_t ofs,
+                      size_t len, char *dst) {
+  const upb_seamsrc *src = _src;
+  assert(ofs + len <= src->len);
+  memcpy(dst, src->str + ofs, len);
+}
+
+void upb_seamsrc_discard(void *src, uint64_t ofs) {
+  (void)src;
+  (void)ofs;
+}
+
+const char *upb_seamsrc_getptr(const void *_s, uint64_t ofs, size_t *len) {
+  const upb_seamsrc *src = _s;
+  *len = upb_seamsrc_avail(src, ofs);
+  return src->str + ofs;
+}
 
-  const upb_def *md = upb_symtab_lookup(symtab, argv[2]);
-  if (!md) {
-    fprintf(stderr, "Descriptor did not contain message: %s\n", argv[2]);
-    return 1;
+void upb_seamsrc_init(upb_seamsrc *s, const char *str, size_t len) {
+  static upb_bytesrc_vtbl vtbl = {
+    &upb_seamsrc_fetch,
+    &upb_seamsrc_discard,
+    &upb_seamsrc_copy,
+    &upb_seamsrc_getptr,
+  };
+  upb_bytesrc_init(&s->bytesrc, &vtbl);
+  s->seam1 = 0;
+  s->seam2 = 0;
+  s->str = str;
+  s->len = len;
+  s->byteregion.bytesrc = &s->bytesrc;
+  s->byteregion.toplevel = true;
+  s->byteregion.start = 0;
+  s->byteregion.end = len;
+}
+
+void upb_seamsrc_resetseams(upb_seamsrc *s, size_t seam1, size_t seam2) {
+  ASSERT(seam1 <= seam2);
+  s->seam1 = seam1;
+  s->seam2 = seam2;
+  s->byteregion.discard = 0;
+  s->byteregion.fetch = 0;
+}
+
+void upb_seamsrc_uninit(upb_seamsrc *s) { (void)s; }
+
+upb_bytesrc *upb_seamsrc_bytesrc(upb_seamsrc *s) {
+  return &s->bytesrc;
+}
+
+// Returns the top-level upb_byteregion* for this seamsrc.  Invalidated when
+// the seamsrc is reset.
+upb_byteregion *upb_seamsrc_allbytes(upb_seamsrc *s) {
+  return &s->byteregion;
+}
+
+
+/* Running of test cases ******************************************************/
+
+upb_decoderplan *plan;
+
+void run_decoder(buffer *proto, buffer *expected_output) {
+  upb_seamsrc src;
+  upb_seamsrc_init(&src, proto->buf, proto->len);
+  upb_decoder d;
+  upb_decoder_init(&d);
+  upb_decoder_resetplan(&d, plan, 0);
+  for (size_t i = 0; i < proto->len; i++) {
+    for (size_t j = i; j < proto->len; j++) {
+      upb_seamsrc_resetseams(&src, i, j);
+      upb_byteregion *input = upb_seamsrc_allbytes(&src);
+      buffer *output = buffer_new(0);
+      upb_decoder_resetinput(&d, input, output);
+      upb_success_t success = UPB_SUSPENDED;
+      while (success == UPB_SUSPENDED)
+        success = upb_decoder_decode(&d);
+      ASSERT(upb_ok(upb_decoder_status(&d)) == (success == UPB_OK));
+      if (expected_output) {
+        ASSERT(success == UPB_OK);
+        // The input should be fully consumed.
+        ASSERT(upb_byteregion_fetchofs(input) == upb_byteregion_endofs(input));
+        ASSERT(upb_byteregion_discardofs(input) ==
+               upb_byteregion_endofs(input));
+        if (!buffer_eql(output, expected_output)) {
+          fprintf(stderr, "Text mismatch: '%s' vs '%s'\n",
+                  output->buf, expected_output->buf);
+        }
+        ASSERT(strcmp(output->buf, expected_output->buf) == 0);
+      } else {
+        ASSERT(success == UPB_ERROR);
+      }
+      buffer_free(output);
+    }
   }
+  upb_seamsrc_uninit(&src);
+  upb_decoder_uninit(&d);
+  buffer_free(proto);
+}
+
+void assert_successful_parse_at_eof(buffer *proto, const char *expected_fmt,
+                                    va_list args) {
+  buffer *expected_text = buffer_new(0);
+  size_t size = expected_text->len;
+  expected_text->len += upb_vrprintf(&expected_text->buf, &size,
+                                     expected_text->len, expected_fmt, args);
+  run_decoder(proto, expected_text);
+  buffer_free(expected_text);
+}
+
+void assert_does_not_parse_at_eof(buffer *proto) {
+  run_decoder(proto, NULL);
+}
+
+void assert_successful_parse(buffer *proto, const char *expected_fmt, ...) {
+  // The JIT is only used for data >=20 bytes from end-of-buffer, so
+  // repeat once with no-op padding data at the end of buffer.
+  va_list args, args2;
+  va_start(args, expected_fmt);
+  va_copy(args2, args);
+  assert_successful_parse_at_eof(buffer_dup(proto), expected_fmt, args);
+  assert_successful_parse_at_eof(
+      cat( proto,
+           tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(buffer_new(30)),
+           NULL ),
+      expected_fmt, args2);
+  va_end(args);
+  va_end(args2);
+}
+
+void assert_does_not_parse(buffer *proto) {
+  // The JIT is only used for data >=20 bytes from end-of-buffer, so
+  // repeat once with no-op padding data at the end of buffer.
+  assert_does_not_parse_at_eof(buffer_dup(proto));
+  assert_does_not_parse_at_eof(
+      cat( proto,
+           tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim( buffer_new(30)),
+           NULL ));
+}
+
+
+/* The actual tests ***********************************************************/
+
+void test_premature_eof_for_type(upb_fieldtype_t type) {
+  // Incomplete values for each wire type.
+  static const char *incompletes[] = {
+    "\x80",    // UPB_WIRE_TYPE_VARINT
+    "abcdefg", // UPB_WIRE_TYPE_64BIT
+    "\x80",    // UPB_WIRE_TYPE_DELIMITED (partial length)
+    NULL,      // UPB_WIRE_TYPE_START_GROUP (no value required)
+    NULL,      // UPB_WIRE_TYPE_END_GROUP (no value required)
+    "abc"      // UPB_WIRE_TYPE_32BIT
+  };
+
+  uint32_t fieldnum = type;
+  uint32_t rep_fieldnum = rep_fn(type);
+  int wire_type = upb_types[type].native_wire_type;
+  const char *incomplete = incompletes[wire_type];
+
+  // EOF before a known non-repeated value.
+  assert_does_not_parse_at_eof(tag(fieldnum, wire_type));
+
+  // EOF before a known repeated value.
+  assert_does_not_parse_at_eof(tag(rep_fieldnum, wire_type));
+
+  // EOF before an unknown value.
+  assert_does_not_parse_at_eof(tag(UNKNOWN_FIELD, wire_type));
+
+  // EOF inside a known non-repeated value.
+  assert_does_not_parse_at_eof(
+      cat( tag(fieldnum, wire_type), buffer_new3(incomplete), NULL ));
+
+  // EOF inside a known repeated value.
+  assert_does_not_parse_at_eof(
+      cat( tag(rep_fieldnum, wire_type), buffer_new3(incomplete), NULL ));
+
+  // EOF inside an unknown value.
+  assert_does_not_parse_at_eof(
+      cat( tag(UNKNOWN_FIELD, wire_type), buffer_new3(incomplete), NULL ));
+
+  if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
+    // EOF in the middle of delimited data for known non-repeated value.
+    assert_does_not_parse_at_eof(
+        cat( tag(fieldnum, wire_type), varint(1), NULL ));
+
+    // EOF in the middle of delimited data for known repeated value.
+    assert_does_not_parse_at_eof(
+        cat( tag(rep_fieldnum, wire_type), varint(1), NULL ));
 
-  const upb_msgdef *m = upb_dyncast_msgdef_const(md);
-  if (!m) {
-    fprintf(stderr, "Def was not a msgdef.\n");
-    return 1;
+    // EOF in the middle of delimited data for unknown value.
+    assert_does_not_parse_at_eof(
+        cat( tag(UNKNOWN_FIELD, wire_type), varint(1), NULL ));
+
+    if (type == UPB_TYPE(MESSAGE)) {
+      // Submessage ends in the middle of a value.
+      buffer *incomplete_submsg =
+          cat ( tag(UPB_TYPE(INT32), UPB_WIRE_TYPE_VARINT),
+                buffer_new3(incompletes[UPB_WIRE_TYPE_VARINT]), NULL );
+      assert_does_not_parse(
+          cat( tag(fieldnum, UPB_WIRE_TYPE_DELIMITED),
+               varint(incomplete_submsg->len),
+               incomplete_submsg, NULL ));
+    }
+  } else {
+    // Packed region ends in the middle of a value.
+    assert_does_not_parse(
+        cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
+             varint(strlen(incomplete)),
+             buffer_new3(incomplete), NULL ));
+
+    // EOF in the middle of packed region.
+    assert_does_not_parse_at_eof(
+        cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), varint(1), NULL ));
   }
+}
 
-  upb_stdio in, out;
-  upb_stdio_init(&in);
-  upb_stdio_init(&out);
-  upb_stdio_reset(&in, stdin);
-  upb_stdio_reset(&out, stdout);
+// "33" and "66" are just two random values that all numeric types can
+// represent.
+void test_valid_data_for_type(upb_fieldtype_t type,
+                              buffer *enc33, buffer *enc66) {
+  uint32_t fieldnum = type;
+  uint32_t rep_fieldnum = rep_fn(type);
+  int wire_type = upb_types[type].native_wire_type;
 
-  upb_handlers *handlers = upb_handlers_new();
-  upb_textprinter *p = upb_textprinter_new();
-  upb_textprinter_reset(p, upb_stdio_bytesink(&out), false);
-  upb_textprinter_reghandlers(handlers, m);
+  // Non-repeated
+  assert_successful_parse(
+      cat( tag(fieldnum, wire_type), buffer_dup(enc33),
+           tag(fieldnum, wire_type), buffer_dup(enc66), NULL ),
+      "%u:33; %u:66; ", fieldnum, fieldnum);
 
-  upb_decoder d;
-  upb_decoder_init(&d, handlers);
-  upb_decoder_reset(&d, upb_stdio_allbytes(&in), p);
+  // Non-packed repeated.
+  assert_successful_parse(
+      cat( tag(rep_fieldnum, wire_type), buffer_dup(enc33),
+           tag(rep_fieldnum, wire_type), buffer_dup(enc66), NULL ),
+      "%u:[ %u:33; %u:66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
+
+  // Packed repeated.
+  assert_successful_parse(
+      cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
+           delim(cat( buffer_dup(enc33), buffer_dup(enc66), NULL )), NULL ),
+      "%u:[ %u:33; %u:66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
+
+  buffer_free(enc33);
+  buffer_free(enc66);
+}
+
+void test_valid_data_for_signed_type(upb_fieldtype_t type,
+                                     buffer *enc33, buffer *enc66) {
+  uint32_t fieldnum = type;
+  uint32_t rep_fieldnum = rep_fn(type);
+  int wire_type = upb_types[type].native_wire_type;
+
+  // Non-repeated
+  assert_successful_parse(
+      cat( tag(fieldnum, wire_type), buffer_dup(enc33),
+           tag(fieldnum, wire_type), buffer_dup(enc66), NULL ),
+      "%u:33; %u:-66; ", fieldnum, fieldnum);
+
+  // Non-packed repeated.
+  assert_successful_parse(
+      cat( tag(rep_fieldnum, wire_type), buffer_dup(enc33),
+           tag(rep_fieldnum, wire_type), buffer_dup(enc66), NULL ),
+      "%u:[ %u:33; %u:-66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
+
+  // Packed repeated.
+  assert_successful_parse(
+      cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
+           delim(cat( buffer_dup(enc33), buffer_dup(enc66), NULL )), NULL ),
+      "%u:[ %u:33; %u:-66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
+
+  buffer_free(enc33);
+  buffer_free(enc66);
+}
+
+// Test that invalid protobufs are properly detected (without crashing) and
+// have an error reported.  Field numbers match registered handlers above.
+void test_invalid() {
+  test_premature_eof_for_type(UPB_TYPE(DOUBLE));
+  test_premature_eof_for_type(UPB_TYPE(FLOAT));
+  test_premature_eof_for_type(UPB_TYPE(INT64));
+  test_premature_eof_for_type(UPB_TYPE(UINT64));
+  test_premature_eof_for_type(UPB_TYPE(INT32));
+  test_premature_eof_for_type(UPB_TYPE(FIXED64));
+  test_premature_eof_for_type(UPB_TYPE(FIXED32));
+  test_premature_eof_for_type(UPB_TYPE(BOOL));
+  test_premature_eof_for_type(UPB_TYPE(STRING));
+  test_premature_eof_for_type(UPB_TYPE(BYTES));
+  test_premature_eof_for_type(UPB_TYPE(UINT32));
+  test_premature_eof_for_type(UPB_TYPE(ENUM));
+  test_premature_eof_for_type(UPB_TYPE(SFIXED32));
+  test_premature_eof_for_type(UPB_TYPE(SFIXED64));
+  test_premature_eof_for_type(UPB_TYPE(SINT32));
+  test_premature_eof_for_type(UPB_TYPE(SINT64));
+
+  // EOF inside a tag's varint.
+  assert_does_not_parse_at_eof( buffer_new3("\x80") );
+
+  // EOF inside a known group.
+  assert_does_not_parse_at_eof( tag(4, UPB_WIRE_TYPE_START_GROUP) );
+
+  // EOF inside an unknown group.
+  assert_does_not_parse_at_eof( tag(UNKNOWN_FIELD, UPB_WIRE_TYPE_START_GROUP) );
 
-  upb_status_clear(&status);
-  upb_decoder_decode(&d, &status);
+  // End group that we are not currently in.
+  assert_does_not_parse( tag(4, UPB_WIRE_TYPE_END_GROUP) );
 
-  if (!upb_ok(&status)) {
-    fprintf(stderr, "Error parsing input: %s", upb_status_getstr(&status));
+  // Field number is 0.
+  assert_does_not_parse(
+      cat( tag(0, UPB_WIRE_TYPE_DELIMITED), varint(0), NULL ));
+
+  // Field number is too large.
+  assert_does_not_parse(
+      cat( tag(UPB_MAX_FIELDNUMBER + 1, UPB_WIRE_TYPE_DELIMITED),
+           varint(0), NULL ));
+
+  // Test exceeding the resource limit of stack depth.
+  buffer *buf = buffer_new3("");
+  for (int i = 0; i < UPB_MAX_NESTING; i++) {
+    buf = submsg(UPB_TYPE(MESSAGE), buf);
   }
+  assert_does_not_parse(buf);
 
-  upb_status_uninit(&status);
-  upb_stdio_uninit(&in);
-  upb_stdio_uninit(&out);
-  upb_decoder_uninit(&d);
-  upb_textprinter_free(p);
-  upb_def_unref(UPB_UPCAST(m));
-  upb_symtab_unref(symtab);
-
-  // Prevent C library from holding buffers open, so Valgrind doesn't see
-  // memory leaks.
-  fclose(stdin);
-  fclose(stdout);
+  // Staying within the stack limit should work properly.
+  buf = buffer_new3("");
+  buffer *textbuf = buffer_new3("");
+  int total = UPB_MAX_NESTING - 1;
+  for (int i = 0; i < total; i++) {
+    buf = submsg(UPB_TYPE(MESSAGE), buf);
+    buffer_appendf(textbuf, "%u:{ ", UPB_TYPE(MESSAGE));
+  }
+  for (int i = 0; i < total; i++) {
+    buffer_appendf(textbuf, "} ");
+  }
+  assert_successful_parse(buf, "%s", textbuf->buf);
+  buffer_free(textbuf);
+}
+
+void test_valid() {
+  test_valid_data_for_signed_type(UPB_TYPE(DOUBLE), dbl(33), dbl(-66));
+  test_valid_data_for_signed_type(UPB_TYPE(FLOAT), flt(33), flt(-66));
+  test_valid_data_for_signed_type(UPB_TYPE(INT64), varint(33), varint(-66));
+  test_valid_data_for_signed_type(UPB_TYPE(INT32), varint(33), varint(-66));
+  test_valid_data_for_signed_type(UPB_TYPE(ENUM), varint(33), varint(-66));
+  test_valid_data_for_signed_type(UPB_TYPE(SFIXED32), uint32(33), uint32(-66));
+  test_valid_data_for_signed_type(UPB_TYPE(SFIXED64), uint64(33), uint64(-66));
+  test_valid_data_for_signed_type(UPB_TYPE(SINT32), zz32(33), zz32(-66));
+  test_valid_data_for_signed_type(UPB_TYPE(SINT64), zz64(33), zz64(-66));
+
+  test_valid_data_for_type(UPB_TYPE(UINT64), varint(33), varint(66));
+  test_valid_data_for_type(UPB_TYPE(UINT32), varint(33), varint(66));
+  test_valid_data_for_type(UPB_TYPE(FIXED64), uint64(33), uint64(66));
+  test_valid_data_for_type(UPB_TYPE(FIXED32), uint32(33), uint32(66));
+
+  // Submessage tests.
+  uint32_t msg_fn = UPB_TYPE(MESSAGE);
+  assert_successful_parse(
+      submsg(msg_fn, submsg(msg_fn, submsg(msg_fn, buffer_new3("")))),
+      "%u:{ %u:{ %u:{ } } } ", msg_fn, msg_fn, msg_fn);
+
+  uint32_t repm_fn = rep_fn(UPB_TYPE(MESSAGE));
+  assert_successful_parse(
+      submsg(repm_fn, submsg(repm_fn, buffer_new3(""))),
+      "%u:[ %u:{ %u:[ %u:{ } ] } ] ", repm_fn, repm_fn, repm_fn, repm_fn);
+}
+
+void run_tests() {
+  test_invalid();
+  test_valid();
+}
+
+int main() {
+  // Construct decoder plan.
+  upb_handlers *h = upb_handlers_new();
+  reghandlers(upb_handlers_newmhandlers(h));
+
+  // Test without JIT.
+  plan = upb_decoderplan_new(h, false);
+  run_tests();
+  upb_decoderplan_unref(plan);
+
+  // Test JIT.
+  plan = upb_decoderplan_new(h, true);
+  run_tests();
+  upb_decoderplan_unref(plan);
+
+  plan = NULL;
+  printf("All tests passed, %d assertions.\n", num_assertions);
+  upb_handlers_unref(h);
+  return 0;
 }
diff --git a/tests/test_varint.c b/tests/test_varint.c
index 4c076b3a47..0fc93f02ed 100644
--- a/tests/test_varint.c
+++ b/tests/test_varint.c
@@ -8,12 +8,39 @@
 #include "upb/pb/varint.h"
 #include "upb_test.h"
 
+// Test that we can round-trip from int->varint->int.
+static void test_varint_for_num(upb_decoderet (*decoder)(const char*),
+                                uint64_t num) {
+  char buf[16];
+  memset(buf, 0xff, sizeof(buf));
+  size_t bytes = upb_vencode64(num, buf);
+
+  if (num <= UINT32_MAX) {
+    char buf2[16];
+    memset(buf2, 0, sizeof(buf2));
+    uint64_t encoded = upb_vencode32(num);
+    memcpy(&buf2, &encoded, 8);
+    upb_decoderet r = decoder(buf2);
+    ASSERT(r.val == num);
+    ASSERT(r.p == buf2 + upb_value_size(encoded));
+    ASSERT(upb_zzenc_32(upb_zzdec_32(num)) == num);
+  }
+
+  upb_decoderet r = decoder(buf);
+  ASSERT(r.val == num);
+  ASSERT(r.p == buf + bytes);
+  ASSERT(upb_zzenc_64(upb_zzdec_64(num)) == num);
+}
+
 static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {
 #define TEST(bytes, expected_val) {\
-    const char buf[] = bytes "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" ; \
+    size_t n = sizeof(bytes) - 1;  /* for NULL */ \
+    char buf[UPB_PB_VARINT_MAX_LEN]; \
+    memset(buf, 0xff, sizeof(buf)); \
+    memcpy(buf, bytes, n); \
     upb_decoderet r = decoder(buf); \
     ASSERT(r.val == expected_val); \
-    ASSERT(r.p == buf + sizeof(buf) - 16);  /* - 1 for NULL */ \
+    ASSERT(r.p == buf + n); \
   }
 
   TEST("\x00",                                                      0ULL);
@@ -30,12 +57,19 @@ static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {
   TEST("\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0x8303fdf9f1e1c181ULL);
 #undef TEST
 
-  char twelvebyte[16] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
+  char twelvebyte[16] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+                         0x80, 0x01, 0x01};
   const char *twelvebyte_buf = twelvebyte;
   // A varint that terminates before hitting the end of the provided buffer,
   // but in too many bytes (11 instead of 10).
   upb_decoderet r = decoder(twelvebyte_buf);
   ASSERT(r.p == NULL);
+
+
+  for (uint64_t num = 5; num * 1.5 > num; num *= 1.5) {
+    test_varint_for_num(decoder, num);
+  }
+  test_varint_for_num(decoder, 0);
 }
 
 
diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc
index 8d13f33c1f..c43649cf62 100644
--- a/tests/test_vs_proto2.cc
+++ b/tests/test_vs_proto2.cc
@@ -7,15 +7,19 @@
  * given proto type and input protobuf.
  */
 
+#define __STDC_LIMIT_MACROS  // So we get UINT32_MAX
 #include <assert.h>
 #include <inttypes.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <google/protobuf/descriptor.h>
-#include "benchmarks/google_messages.pb.h"
+#include <google/protobuf/wire_format_lite.h>
+#include "upb/benchmarks/google_messages.pb.h"
 #include "upb/def.h"
 #include "upb/msg.h"
 #include "upb/pb/glue.h"
+#include "upb/pb/varint.h"
 #include "upb_test.h"
 
 size_t string_size;
@@ -179,13 +183,13 @@ void compare(const google::protobuf::Message& proto2_msg,
 
 void parse_and_compare(MESSAGE_CIDENT *proto2_msg,
                        void *upb_msg, const upb_msgdef *upb_md,
-                       const char *str, size_t len)
+                       const char *str, size_t len, bool allow_jit)
 {
   // Parse to both proto2 and upb.
   ASSERT(proto2_msg->ParseFromArray(str, len));
   upb_status status = UPB_STATUS_INIT;
   upb_msg_clear(upb_msg, upb_md);
-  upb_strtomsg(str, len, upb_msg, upb_md, &status);
+  upb_strtomsg(str, len, upb_msg, upb_md, allow_jit, &status);
   if (!upb_ok(&status)) {
     fprintf(stderr, "Error parsing protobuf: %s", upb_status_getstr(&status));
     exit(1);
@@ -241,8 +245,10 @@ int main(int argc, char *argv[])
   // Run twice to test proper object reuse.
   MESSAGE_CIDENT proto2_msg;
   void *upb_msg = upb_stdmsg_new(msgdef);
-  parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len);
-  parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len);
+  parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, true);
+  parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, false);
+  parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, true);
+  parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, false);
   printf("All tests passed, %d assertions.\n", num_assertions);
 
   upb_stdmsg_free(upb_msg, msgdef);
@@ -250,6 +256,17 @@ int main(int argc, char *argv[])
   free((void*)str);
   upb_symtab_unref(symtab);
   upb_status_uninit(&status);
+
+  // Test Zig-Zag encoding/decoding.
+  for (uint64_t num = 5; num * 1.5 > num; num *= 1.5) {
+    ASSERT(upb_zzenc_64(num) ==
+           google::protobuf::internal::WireFormatLite::ZigZagEncode64(num));
+    if (num < UINT32_MAX) {
+      ASSERT(upb_zzenc_32(num) ==
+             google::protobuf::internal::WireFormatLite::ZigZagEncode32(num));
+    }
+  }
+
   google::protobuf::ShutdownProtobufLibrary();
 
   return 0;
diff --git a/tests/tests.c b/tests/tests.c
index 83fb3ef311..12ff4bb23d 100644
--- a/tests/tests.c
+++ b/tests/tests.c
@@ -39,9 +39,13 @@ static void test_upb_jit() {
   upb_handlers *h = upb_handlers_new();
   upb_handlerset hset = {NULL, NULL, &upb_test_onvalue, NULL, NULL, NULL, NULL};
   upb_handlers_reghandlerset(h, upb_downcast_msgdef_const(def), &hset);
-  upb_decoder d;
-  upb_decoder_init(&d, h);
-  upb_decoder_uninit(&d);
+  upb_decoderplan *p = upb_decoderplan_new(h, true);
+#ifdef UPB_USE_JIT_X64
+  ASSERT(upb_decoderplan_hasjitcode(p));
+#else
+  ASSERT(!upb_decoderplan_hasjitcode(p));
+#endif
+  upb_decoderplan_unref(p);
   upb_symtab_unref(s);
   upb_def_unref(def);
   upb_handlers_unref(h);
diff --git a/upb/bytestream.c b/upb/bytestream.c
index 135f269535..8feb678037 100644
--- a/upb/bytestream.c
+++ b/upb/bytestream.c
@@ -25,7 +25,7 @@ upb_byteregion *upb_byteregion_new(const void *str) {
   return upb_byteregion_newl(str, strlen(str));
 }
 
-upb_byteregion *upb_byteregion_newl(const void *str, uint32_t len) {
+upb_byteregion *upb_byteregion_newl(const void *str, size_t len) {
   upb_stringsrc *src = malloc(sizeof(*src));
   upb_stringsrc_init(src);
   char *ptr = malloc(len + 1);
@@ -37,7 +37,7 @@ upb_byteregion *upb_byteregion_newl(const void *str, uint32_t len) {
 
 void upb_byteregion_free(upb_byteregion *r) {
   if (!r) return;
-  uint32_t len;
+  size_t len;
   free((char*)upb_byteregion_getptr(r, 0, &len));
   upb_stringsrc_uninit((upb_stringsrc*)r->bytesrc);
   free(r->bytesrc);
@@ -64,16 +64,14 @@ void upb_byteregion_reset(upb_byteregion *r, const upb_byteregion *src,
   r->fetch = UPB_MIN(src->fetch, r->end);
 }
 
-bool upb_byteregion_fetch(upb_byteregion *r, upb_status *s) {
+upb_bytesuccess_t upb_byteregion_fetch(upb_byteregion *r) {
   uint64_t fetchable = upb_byteregion_remaining(r, r->fetch);
-  if (fetchable == 0) {
-    upb_status_seteof(s);
-    return false;
-  }
-  uint64_t num = upb_bytesrc_fetch(r->bytesrc, r->fetch, s);
-  if (num == 0) return false;
-  r->fetch += UPB_MIN(num, fetchable);
-  return true;
+  if (fetchable == 0) return UPB_BYTE_EOF;
+  size_t fetched;
+  upb_bytesuccess_t ret = upb_bytesrc_fetch(r->bytesrc, r->fetch, &fetched);
+  if (ret != UPB_BYTE_OK) return false;
+  r->fetch += UPB_MIN(fetched, fetchable);
+  return UPB_BYTE_OK;
 }
 
 
@@ -93,10 +91,10 @@ static upb_stdio_buf *upb_stdio_findbuf(const upb_stdio *s, uint64_t ofs) {
 
 static upb_stdio_buf *upb_stdio_rotatebufs(upb_stdio *s) {
   upb_stdio_buf **reuse = NULL;  // XXX
-  uint32_t num_reused = 0, num_inuse = 0;
+  int num_reused = 0, num_inuse = 0;
 
   // Could sweep only a subset of bufs if this was a hotspot.
-  for (uint32_t i = 0; i < s->nbuf; i++) {
+  for (int i = 0; i < s->nbuf; i++) {
     upb_stdio_buf *buf = s->bufs[i];
     if (buf->refcount > 0) {
       s->bufs[num_inuse++] = buf;
@@ -120,28 +118,37 @@ void upb_stdio_discard(void *src, uint64_t ofs) {
   (void)ofs;
 }
 
-uint32_t upb_stdio_fetch(void *src, uint64_t ofs, upb_status *s) {
+upb_bytesuccess_t upb_stdio_fetch(void *src, uint64_t ofs, size_t *bytes_read) {
   (void)ofs;
   upb_stdio *stdio = (upb_stdio*)src;
   upb_stdio_buf *buf = upb_stdio_rotatebufs(stdio);
-  uint32_t read = fread(&buf->data, 1, BUF_SIZE, stdio->file);
-  buf->len = read;
-  if(read < (uint32_t)BUF_SIZE) {
+retry:
+  *bytes_read = fread(&buf->data, 1, BUF_SIZE, stdio->file);
+  buf->len = *bytes_read;
+  if (*bytes_read < (size_t)BUF_SIZE) {
     // Error or EOF.
-    if(feof(stdio->file)) {
-      upb_status_seteof(s);
-      return read;
+    if (feof(stdio->file)) {
+      upb_status_seteof(&stdio->src.status);
+      return UPB_BYTE_EOF;
     }
-    if(ferror(stdio->file)) {
-      upb_status_fromerrno(s);
-      return 0;
+    if (ferror(stdio->file)) {
+#ifdef EINTR
+      // If we encounter a client who doesn't want to retry EINTR, we can easily
+      // add a boolean property of the stdio that controls this behavior.
+      if (errno == EINTR) {
+        clearerr(stdio->file);
+        goto retry;
+      }
+#endif
+      upb_status_fromerrno(&stdio->src.status);
+      return upb_errno_is_wouldblock() ? UPB_BYTE_WOULDBLOCK : UPB_BYTE_ERROR;
     }
     assert(false);
   }
-  return buf->ofs + buf->len;
+  return UPB_BYTE_OK;
 }
 
-void upb_stdio_read(const void *src, uint64_t ofs, uint32_t len, char *dst) {
+void upb_stdio_copy(const void *src, uint64_t ofs, size_t len, char *dst) {
   upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
   ofs -= buf->ofs;
   memcpy(dst, buf->data + ofs, BUF_SIZE - ofs);
@@ -149,14 +156,14 @@ void upb_stdio_read(const void *src, uint64_t ofs, uint32_t len, char *dst) {
   dst += (BUF_SIZE - ofs);
   while (len > 0) {
     ++buf;
-    uint32_t bytes = UPB_MIN(len, BUF_SIZE);
+    size_t bytes = UPB_MIN(len, BUF_SIZE);
     memcpy(dst, buf->data, bytes);
     len -= bytes;
     dst += bytes;
   }
 }
 
-const char *upb_stdio_getptr(const void *src, uint64_t ofs, uint32_t *len) {
+const char *upb_stdio_getptr(const void *src, uint64_t ofs, size_t *len) {
   upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
   ofs -= buf->ofs;
   *len = BUF_SIZE - ofs;
@@ -168,7 +175,7 @@ upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *s
   upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink));
   upb_strlen_t len = upb_string_len(str);
   upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file);
-  if(written < len) {
+  if (written < len) {
     upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream.");
     return -1;
   }
@@ -191,7 +198,7 @@ void upb_stdio_init(upb_stdio *stdio) {
   static upb_bytesrc_vtbl bytesrc_vtbl = {
     &upb_stdio_fetch,
     &upb_stdio_discard,
-    &upb_stdio_read,
+    &upb_stdio_copy,
     &upb_stdio_getptr,
   };
   upb_bytesrc_init(&stdio->src, &bytesrc_vtbl);
@@ -226,20 +233,25 @@ void upb_stdio_uninit(upb_stdio *stdio) {
   stdio->file = NULL;
 }
 
-upb_byteregion* upb_stdio_allbytes(upb_stdio *stdio) { return &stdio->byteregion; }
+upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->src; }
 upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; }
 
 
 /* upb_stringsrc **************************************************************/
 
-uint32_t upb_stringsrc_fetch(void *_src, uint64_t ofs, upb_status *s) {
+upb_bytesuccess_t upb_stringsrc_fetch(void *_src, uint64_t ofs, size_t *read) {
   upb_stringsrc *src = _src;
-  upb_status_seteof(s);
-  return src->len - ofs;
+  assert(ofs < src->len);
+  if (ofs == src->len) {
+    upb_status_seteof(&src->bytesrc.status);
+    return UPB_BYTE_EOF;
+  }
+  *read = src->len - ofs;
+  return UPB_BYTE_OK;
 }
 
-void upb_stringsrc_read(const void *_src, uint64_t ofs,
-                        uint32_t len, char *dst) {
+void upb_stringsrc_copy(const void *_src, uint64_t ofs,
+                        size_t len, char *dst) {
   const upb_stringsrc *src = _src;
   assert(ofs + len <= src->len);
   memcpy(dst, src->str + ofs, len);
@@ -250,7 +262,7 @@ void upb_stringsrc_discard(void *src, uint64_t ofs) {
   (void)ofs;
 }
 
-const char *upb_stringsrc_getptr(const void *_s, uint64_t ofs, uint32_t *len) {
+const char *upb_stringsrc_getptr(const void *_s, uint64_t ofs, size_t *len) {
   const upb_stringsrc *src = _s;
   *len = src->len - ofs;
   return src->str + ofs;
@@ -260,7 +272,7 @@ void upb_stringsrc_init(upb_stringsrc *s) {
   static upb_bytesrc_vtbl vtbl = {
     &upb_stringsrc_fetch,
     &upb_stringsrc_discard,
-    &upb_stringsrc_read,
+    &upb_stringsrc_copy,
     &upb_stringsrc_getptr,
   };
   upb_bytesrc_init(&s->bytesrc, &vtbl);
@@ -269,7 +281,7 @@ void upb_stringsrc_init(upb_stringsrc *s) {
   s->byteregion.toplevel = true;
 }
 
-void upb_stringsrc_reset(upb_stringsrc *s, const char *str, uint32_t len) {
+void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len) {
   s->str = str;
   s->len = len;
   s->byteregion.start = 0;
@@ -280,18 +292,13 @@ void upb_stringsrc_reset(upb_stringsrc *s, const char *str, uint32_t len) {
 
 void upb_stringsrc_uninit(upb_stringsrc *s) { (void)s; }
 
-upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
-  return &s->bytesrc;
-}
-
-
 /* upb_stringsink *************************************************************/
 
 void upb_stringsink_uninit(upb_stringsink *s) {
   free(s->str);
 }
 
-void upb_stringsink_reset(upb_stringsink *s, char *str, uint32_t size) {
+void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size) {
   free(s->str);
   s->str = str;
   s->len = 0;
diff --git a/upb/bytestream.h b/upb/bytestream.h
index 3b339f17d7..409ae80f1c 100644
--- a/upb/bytestream.h
+++ b/upb/bytestream.h
@@ -63,11 +63,17 @@
 // +------------------------
 // | nondelimited region   Z   <-- won't return EOF until data source hits EOF.
 // +------------------------
+//
+// TODO: if 64-bit math for stream offsets is a performance issue on
+// non-64-bit machines, we could introduce a upb_off_t typedef that can be
+// defined as a 32-bit type for applications that don't need to handle
+// streams longer than 4GB.
 
 
 #ifndef UPB_BYTESTREAM_H
 #define UPB_BYTESTREAM_H
 
+#include <errno.h>
 #include <stdarg.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -79,6 +85,12 @@
 extern "C" {
 #endif
 
+typedef enum {
+  UPB_BYTE_OK = UPB_OK,
+  UPB_BYTE_WOULDBLOCK = UPB_SUSPENDED,
+  UPB_BYTE_ERROR = UPB_ERROR,
+  UPB_BYTE_EOF
+} upb_bytesuccess_t;
 
 /* upb_bytesrc ****************************************************************/
 
@@ -90,10 +102,10 @@ extern "C" {
 // upb_bytesrc is a virtual base class with implementations that get data from
 // eg. a string, a cord, a file descriptor, a FILE*, etc.
 
-typedef uint32_t upb_bytesrc_fetch_func(void*, uint64_t, upb_status*);
+typedef upb_bytesuccess_t upb_bytesrc_fetch_func(void*, uint64_t, size_t*);
 typedef void upb_bytesrc_discard_func(void*, uint64_t);
-typedef void upb_bytesrc_copy_func(const void*, uint64_t, uint32_t, char*);
-typedef const char *upb_bytesrc_getptr_func(const void*, uint64_t, uint32_t*);
+typedef void upb_bytesrc_copy_func(const void*, uint64_t, size_t, char*);
+typedef const char *upb_bytesrc_getptr_func(const void*, uint64_t, size_t*);
 typedef struct _upb_bytesrc_vtbl {
   upb_bytesrc_fetch_func     *fetch;
   upb_bytesrc_discard_func   *discard;
@@ -102,21 +114,27 @@ typedef struct _upb_bytesrc_vtbl {
 } upb_bytesrc_vtbl;
 
 typedef struct {
-  upb_bytesrc_vtbl  *vtbl;
+  const upb_bytesrc_vtbl  *vtbl;
+  upb_status status;
 } upb_bytesrc;
 
-INLINE void upb_bytesrc_init(upb_bytesrc *src, upb_bytesrc_vtbl *vtbl) {
+INLINE void upb_bytesrc_init(upb_bytesrc *src, const upb_bytesrc_vtbl *vtbl) {
   src->vtbl = vtbl;
+  upb_status_init(&src->status);
+}
+
+INLINE void upb_bytesrc_uninit(upb_bytesrc *src) {
+  upb_status_uninit(&src->status);
 }
 
-// Fetches at least one byte starting at ofs, returning the actual number of
-// bytes fetched (or 0 on EOF or error: see *s for details).  Some bytesrc's
-// may set EOF on *s after a successful read if no further data is available,
-// but not all bytesrc's support this.  It is valid for bytes to be fetched
-// multiple times, as long as the bytes have not been previously discarded.
-INLINE uint32_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs,
-                                  upb_status *s) {
-  return src->vtbl->fetch(src, ofs, s);
+// Fetches at least one byte starting at ofs, returning the success or failure
+// of the operation.  If UPB_BYTE_OK is returned, *read indicates the number of
+// of bytes successfully fetched; any error or EOF status will be reflected in
+// upb_bytesrc_status().  It is valid for bytes to be fetched multiple times,
+// as long as the bytes have not been previously discarded.
+INLINE upb_bytesuccess_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs,
+                                           size_t *read) {
+  return src->vtbl->fetch(src, ofs, read);
 }
 
 // Discards all data prior to ofs (except data that is pinned, if pinning
@@ -127,7 +145,7 @@ INLINE void upb_bytesrc_discard(upb_bytesrc *src, uint64_t ofs) {
 
 // Copies "len" bytes of data from ofs to "dst", which must be at least "len"
 // bytes long.  The given region must not be discarded.
-INLINE void upb_bytesrc_copy(const upb_bytesrc *src, uint64_t ofs, uint32_t len,
+INLINE void upb_bytesrc_copy(const upb_bytesrc *src, uint64_t ofs, size_t len,
                              char *dst) {
   src->vtbl->copy(src, ofs, len, dst);
 }
@@ -138,7 +156,7 @@ INLINE void upb_bytesrc_copy(const upb_bytesrc *src, uint64_t ofs, uint32_t len,
 // part of the returned buffer is discarded, only the non-discarded bytes
 // remain valid).
 INLINE const char *upb_bytesrc_getptr(const upb_bytesrc *src, uint64_t ofs,
-                                      uint32_t *len) {
+                                      size_t *len) {
   return src->vtbl->getptr(src, ofs, len);
 }
 
@@ -148,14 +166,14 @@ INLINE const char *upb_bytesrc_getptr(const upb_bytesrc *src, uint64_t ofs,
 // // is guaranteed that the region will not be discarded (nor will the bytesrc
 // // be destroyed) until the region is unpinned.  However, not all bytesrc's
 // // support pinning; a false return indicates that a pin was not possible.
-// INLINE bool upb_bytesrc_pin(upb_bytesrc *src, uint64_t ofs, uint32_t len) {
+// INLINE bool upb_bytesrc_pin(upb_bytesrc *src, uint64_t ofs, size_t len) {
 //   return src->vtbl->refregion(src, ofs, len);
 // }
 //
 // // Releases some number of pinned bytes from the beginning of a pinned
 // // region (which may be fewer than the total number of bytes pinned).
-// INLINE void upb_bytesrc_unpin(upb_bytesrc *src, uint64_t ofs, uint32_t len,
-//                               uint32_t bytes_to_release) {
+// INLINE void upb_bytesrc_unpin(upb_bytesrc *src, uint64_t ofs, size_t len,
+//                               size_t bytes_to_release) {
 //   src->vtbl->unpin(src, ofs, len);
 // }
 //
@@ -173,7 +191,7 @@ typedef struct _upb_byteregion {
   uint64_t fetch;
   uint64_t end;         // UPB_NONDELIMITED if nondelimited.
   upb_bytesrc *bytesrc;
-  bool toplevel;        // If true, discards hit the underlying byteregion.
+  bool toplevel;        // If true, discards hit the underlying bytesrc.
 } upb_byteregion;
 
 // Initializes a byteregion.  Its initial value will be empty.  No methods may
@@ -225,14 +243,17 @@ void upb_byteregion_release(upb_byteregion *r);
 // Attempts to fetch more data, extending the fetched range of this byteregion.
 // Returns true if the fetched region was extended by at least one byte, false
 // on EOF or error (see *s for details).
-bool upb_byteregion_fetch(upb_byteregion *r, upb_status *s);
+upb_bytesuccess_t upb_byteregion_fetch(upb_byteregion *r);
 
-// Fetches all remaining data for "r", returning false if the operation failed
-// (see "*s" for details).  May only be used on delimited byteregions.
-INLINE bool upb_byteregion_fetchall(upb_byteregion *r, upb_status *s) {
+// Fetches all remaining data for "r", returning the success of the operation
+// May only be used on delimited byteregions.
+INLINE upb_bytesuccess_t upb_byteregion_fetchall(upb_byteregion *r) {
   assert(upb_byteregion_len(r) != UPB_NONDELIMITED);
-  while (upb_byteregion_fetch(r, s)) ;  // Empty body.
-  return upb_eof(s);
+  upb_bytesuccess_t ret;
+  do {
+    ret = upb_byteregion_fetch(r);
+  } while (ret == UPB_BYTE_OK);
+  return ret == UPB_BYTE_EOF ? UPB_BYTE_OK : ret;
 }
 
 // Discards bytes from the byteregion up until ofs (which must be greater or
@@ -243,13 +264,14 @@ INLINE void upb_byteregion_discard(upb_byteregion *r, uint64_t ofs) {
   assert(ofs >= upb_byteregion_discardofs(r));
   assert(ofs <= upb_byteregion_endofs(r));
   r->discard = ofs;
+  if (ofs > r->fetch) r->fetch = ofs;
   if (r->toplevel) upb_bytesrc_discard(r->bytesrc, ofs);
 }
 
 // Copies "len" bytes of data into "dst", starting at ofs.  The specified
 // region must be available.
 INLINE void upb_byteregion_copy(const upb_byteregion *r, uint64_t ofs,
-                                uint32_t len, char *dst) {
+                                size_t len, char *dst) {
   assert(ofs >= upb_byteregion_discardofs(r));
   assert(len <= upb_byteregion_available(r, ofs));
   upb_bytesrc_copy(r->bytesrc, ofs, len, dst);
@@ -268,7 +290,7 @@ INLINE void upb_byteregion_copyall(const upb_byteregion *r, char *dst) {
 // or when the bytes are discarded.  If the byteregion is not currently pinned,
 // the pointer is only valid for the lifetime of the parent byteregion.
 INLINE const char *upb_byteregion_getptr(const upb_byteregion *r,
-                                         uint64_t ofs, uint32_t *len) {
+                                         uint64_t ofs, size_t *len) {
   assert(ofs >= upb_byteregion_discardofs(r));
   const char *ret = upb_bytesrc_getptr(r->bytesrc, ofs, len);
   *len = UPB_MIN(*len, upb_byteregion_available(r, ofs));
@@ -295,7 +317,7 @@ INLINE const char *upb_byteregion_getptr(const upb_byteregion *r,
 // The string data in the returned region is guaranteed to be contiguous and
 // NULL-terminated.
 upb_byteregion *upb_byteregion_new(const void *str);
-upb_byteregion *upb_byteregion_newl(const void *str, uint32_t len);
+upb_byteregion *upb_byteregion_newl(const void *str, size_t len);
 // May *only* be called on a byteregion created with upb_byteregion_new[l]()!
 void upb_byteregion_free(upb_byteregion *r);
 
@@ -399,7 +421,7 @@ INLINE void upb_bytesink_rewind(upb_bytesink *sink, uint64_t offset) {
 
 typedef struct {
   uint64_t ofs;
-  uint32_t len;
+  size_t len;
   uint32_t refcount;
   char data[];
 } upb_stdio_buf;
@@ -414,7 +436,6 @@ typedef struct {
   bool should_close;
   upb_stdio_buf **bufs;
   uint32_t nbuf, szbuf;
-  upb_byteregion byteregion;
 } upb_stdio;
 
 void upb_stdio_init(upb_stdio *stdio);
@@ -433,7 +454,7 @@ void upb_stdio_reset(upb_stdio *stdio, FILE *file);
 void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode,
                     upb_status *s);
 
-upb_byteregion *upb_stdio_allbytes(upb_stdio *stdio);
+upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio);
 upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio);
 
 
@@ -444,7 +465,7 @@ upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio);
 typedef struct {
   upb_bytesrc bytesrc;
   const char *str;
-  uint32_t len;
+  size_t len;
   upb_byteregion byteregion;
 } upb_stringsrc;
 
@@ -454,7 +475,11 @@ void upb_stringsrc_uninit(upb_stringsrc *s);
 
 // Resets the stringsrc to a state where it will vend the given string.  The
 // string data must be valid until the stringsrc is reset again or destroyed.
-void upb_stringsrc_reset(upb_stringsrc *s, const char *str, uint32_t len);
+void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len);
+
+INLINE upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
+  return &s->bytesrc;
+}
 
 // Returns the top-level upb_byteregion* for this stringsrc.  Invalidated when
 // the stringsrc is reset.
@@ -468,7 +493,7 @@ INLINE upb_byteregion *upb_stringsrc_allbytes(upb_stringsrc *s) {
 struct _upb_stringsink {
   upb_bytesink bytesink;
   char *str;
-  uint32_t len, size;
+  size_t len, size;
 };
 typedef struct _upb_stringsink upb_stringsink;
 
@@ -478,12 +503,12 @@ void upb_stringsink_uninit(upb_stringsink *s);
 
 // Resets the sink's string to "str", which the sink takes ownership of.
 // "str" may be NULL, which will make the sink allocate a new string.
-void upb_stringsink_reset(upb_stringsink *s, char *str, uint32_t len);
+void upb_stringsink_reset(upb_stringsink *s, char *str, size_t len);
 
 // Releases ownership of the returned string (which is "len" bytes long) and
 // resets the internal string to be empty again (as if reset were called with
 // NULL).
-const char *upb_stringsink_release(upb_stringsink *s, uint32_t *len);
+const char *upb_stringsink_release(upb_stringsink *s, size_t *len);
 
 // Returns the upb_bytesink* for this stringsrc.  Invalidated by reset above.
 upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s);
diff --git a/upb/def.c b/upb/def.c
index 13418c6fdd..246e9bb7a0 100644
--- a/upb/def.c
+++ b/upb/def.c
@@ -334,7 +334,7 @@ static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) {
     if (upb_byteregion_len(bytes) == 0) {
       upb_value_setint32(&f->defaultval, e->defaultval);
     } else {
-      uint32_t len;
+      size_t len;
       // ptr is guaranteed to be NULL-terminated because the byteregion was
       // created with upb_byteregion_newl().
       const char *ptr = upb_byteregion_getptr(bytes, 0, &len);
diff --git a/upb/handlers.c b/upb/handlers.c
index 0af09ef43c..d1b68ad81e 100644
--- a/upb/handlers.c
+++ b/upb/handlers.c
@@ -13,7 +13,7 @@
 
 static upb_mhandlers *upb_mhandlers_new() {
   upb_mhandlers *m = malloc(sizeof(*m));
-  upb_inttable_init(&m->fieldtab, 8, sizeof(upb_fhandlers));
+  upb_inttable_init(&m->fieldtab, 8, sizeof(upb_itofhandlers_ent));
   m->startmsg = NULL;
   m->endmsg = NULL;
   m->is_group = false;
@@ -26,21 +26,21 @@ static upb_mhandlers *upb_mhandlers_new() {
 static upb_fhandlers *_upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
                                                   upb_fieldtype_t type,
                                                   bool repeated) {
-  uint32_t tag = n << 3 | upb_types[type].native_wire_type;
-  upb_fhandlers *f = upb_inttable_lookup(&m->fieldtab, tag);
-  if (f) abort();
-  upb_fhandlers new_f = {false, type, repeated,
-      repeated && upb_isprimitivetype(type), UPB_ATOMIC_INIT(0),
+  upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, n);
+  // TODO: design/refine the API for changing the set of fields or modifying
+  // existing handlers.
+  if (e) return NULL;
+  upb_fhandlers new_f = {type, repeated, UPB_ATOMIC_INIT(0),
       n, -1, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL,
 #ifdef UPB_USE_JIT_X64
       0, 0, 0,
 #endif
       NULL};
-  upb_inttable_insert(&m->fieldtab, tag, &new_f);
-  f = upb_inttable_lookup(&m->fieldtab, tag);
-  assert(f);
-  assert(f->type == type);
-  return f;
+  upb_fhandlers *ptr = malloc(sizeof(*ptr));
+  memcpy(ptr, &new_f, sizeof(upb_fhandlers));
+  upb_itofhandlers_ent ent = {false, ptr};
+  upb_inttable_insert(&m->fieldtab, n, &ent);
+  return ptr;
 }
 
 upb_fhandlers *upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
@@ -57,6 +57,7 @@ upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n,
   assert(type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP));
   assert(subm);
   upb_fhandlers *f = _upb_mhandlers_newfhandlers(m, n, type, repeated);
+  if (!f) return NULL;
   f->submsg = subm;
   if (type == UPB_TYPE(GROUP))
     _upb_mhandlers_newfhandlers(subm, n, UPB_TYPE_ENDGROUP, false);
@@ -82,6 +83,12 @@ void upb_handlers_unref(upb_handlers *h) {
   if (upb_atomic_unref(&h->refcount)) {
     for (int i = 0; i < h->msgs_len; i++) {
       upb_mhandlers *mh = h->msgs[i];
+      for(upb_inttable_iter j = upb_inttable_begin(&mh->fieldtab);
+          !upb_inttable_done(j);
+          j = upb_inttable_next(&mh->fieldtab, j)) {
+        upb_itofhandlers_ent *e = upb_inttable_iter_value(j);
+        free(e->f);
+      }
       upb_inttable_free(&mh->fieldtab);
 #ifdef UPB_USE_JIT_X64
       free(mh->tablearray);
@@ -154,41 +161,24 @@ upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m,
 
 /* upb_dispatcher *************************************************************/
 
-static upb_fhandlers toplevel_f = {
-  false, UPB_TYPE(GROUP), false, false, UPB_ATOMIC_INIT(0), 0,
-  -1, NULL, NULL, // submsg
-#ifdef NDEBUG
-  {{0}},
-#else
-  {{0}, -1},
-#endif
-  NULL, NULL, NULL, NULL, NULL,
-#ifdef UPB_USE_JIT_X64
-  0, 0, 0,
-#endif
-  NULL};
-
-void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
-                         upb_skip_handler *skip, upb_exit_handler *exit,
+void upb_dispatcher_init(upb_dispatcher *d, upb_status *status,
+                         upb_exit_handler UPB_NORETURN *exit,
                          void *srcclosure) {
-  d->handlers = h;
-  upb_handlers_ref(h);
-  for (int i = 0; i < h->msgs_len; i++) {
-    upb_mhandlers *m = h->msgs[i];
-    upb_inttable_compact(&m->fieldtab);
-  }
-  d->stack[0].f = &toplevel_f;
+  d->stack[0].f = NULL;  // Should never be read.
   d->limit = &d->stack[UPB_MAX_NESTING];
-  d->skip = skip;
-  d->exit = exit;
+  d->exitjmp = exit;
   d->srcclosure = srcclosure;
   d->top_is_implicit = false;
-  upb_status_init(&d->status);
+  d->msgent = NULL;
+  d->top = NULL;
+  d->toplevel_msgent = NULL;
+  d->status = status;
 }
 
-upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure) {
-  d->msgent = d->handlers->msgs[0];
-  d->dispatch_table = &d->msgent->fieldtab;
+upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure,
+                                           upb_mhandlers *top) {
+  d->msgent = top;
+  d->toplevel_msgent = top;
   d->top = d->stack;
   d->top->closure = closure;
   d->top->is_sequence = false;
@@ -197,46 +187,32 @@ upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure) {
 }
 
 void upb_dispatcher_uninit(upb_dispatcher *d) {
-  upb_handlers_unref(d->handlers);
-  upb_status_uninit(&d->status);
 }
 
 void upb_dispatch_startmsg(upb_dispatcher *d) {
   upb_flow_t flow = UPB_CONTINUE;
   if (d->msgent->startmsg) d->msgent->startmsg(d->top->closure);
-  if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
+  if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d);
 }
 
 void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) {
   assert(d->top == d->stack);
-  if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, &d->status);
+  if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, d->status);
   // TODO: should we avoid this copy by passing client's status obj to cbs?
-  upb_status_copy(status, &d->status);
-}
-
-void indent(upb_dispatcher *d) {
-  for (int i = 0; i < (d->top - d->stack); i++) fprintf(stderr, " ");
-}
-
-void indentm1(upb_dispatcher *d) {
-  for (int i = 0; i < (d->top - d->stack - 1); i++) fprintf(stderr, " ");
+  upb_status_copy(status, d->status);
 }
 
 upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
                                             upb_fhandlers *f) {
-  //indent(d);
-  //fprintf(stderr, "START SEQ: %d\n", f->number);
-  if((d->top+1) >= d->limit) {
-    upb_status_seterrliteral(&d->status, "Nesting too deep.");
-    _upb_dispatcher_unwind(d, UPB_BREAK);
-    return d->top;  // Dummy.
+  if (d->top + 1 >= d->limit) {
+    upb_status_seterrliteral(d->status, "Nesting too deep.");
+    _upb_dispatcher_abortjmp(d);
   }
 
   upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
   if (f->startseq) sflow = f->startseq(d->top->closure, f->fval);
   if (sflow.flow != UPB_CONTINUE) {
-    _upb_dispatcher_unwind(d, sflow.flow);
-    return d->top;  // Dummy.
+    _upb_dispatcher_abortjmp(d);
   }
 
   ++d->top;
@@ -248,8 +224,6 @@ upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
 }
 
 upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) {
-  //indentm1(d);
-  //fprintf(stderr, "END SEQ\n");
   assert(d->top > d->stack);
   assert(d->top->is_sequence);
   upb_fhandlers *f = d->top->f;
@@ -257,30 +231,23 @@ upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) {
   upb_flow_t flow = UPB_CONTINUE;
   if (f->endseq) flow = f->endseq(d->top->closure, f->fval);
   if (flow != UPB_CONTINUE) {
-    printf("YO, UNWINDING!\n");
-    _upb_dispatcher_unwind(d, flow);
-    return d->top;  // Dummy.
+    _upb_dispatcher_abortjmp(d);
   }
-  d->msgent = d->top->f->submsg ? d->top->f->submsg : d->handlers->msgs[0];
-  d->dispatch_table = &d->msgent->fieldtab;
+  d->msgent = d->top->f ? d->top->f->submsg : d->toplevel_msgent;
   return d->top;
 }
 
 upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
                                                upb_fhandlers *f) {
-  //indent(d);
-  //fprintf(stderr, "START SUBMSG: %d\n", f->number);
-  if((d->top+1) >= d->limit) {
-    upb_status_seterrliteral(&d->status, "Nesting too deep.");
-    _upb_dispatcher_unwind(d, UPB_BREAK);
-    return d->top;  // Dummy.
+  if (d->top + 1 >= d->limit) {
+    upb_status_seterrliteral(d->status, "Nesting too deep.");
+    _upb_dispatcher_abortjmp(d);
   }
 
   upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
   if (f->startsubmsg) sflow = f->startsubmsg(d->top->closure, f->fval);
   if (sflow.flow != UPB_CONTINUE) {
-    _upb_dispatcher_unwind(d, sflow.flow);
-    return d->top;  // Dummy.
+    _upb_dispatcher_abortjmp(d);
   }
 
   ++d->top;
@@ -289,24 +256,20 @@ upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
   d->top->is_packed = false;
   d->top->closure = sflow.closure;
   d->msgent = f->submsg;
-  d->dispatch_table = &d->msgent->fieldtab;
   upb_dispatch_startmsg(d);
   return d->top;
 }
 
 upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d) {
-  //indentm1(d);
-  //fprintf(stderr, "END SUBMSG\n");
   assert(d->top > d->stack);
   assert(!d->top->is_sequence);
   upb_fhandlers *f = d->top->f;
-  if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, &d->status);
+  if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, d->status);
   d->msgent = d->top->f->msg;
-  d->dispatch_table = &d->msgent->fieldtab;
   --d->top;
   upb_flow_t flow = UPB_CONTINUE;
   if (f->endsubmsg) f->endsubmsg(d->top->closure, f->fval);
-  if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
+  if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d);
   return d->top;
 }
 
@@ -320,14 +283,7 @@ bool upb_dispatcher_islegalend(upb_dispatcher *d) {
   return false;
 }
 
-void _upb_dispatcher_unwind(upb_dispatcher *d, upb_flow_t flow) {
-  upb_dispatcher_frame *frame = d->top;
-  while (1) {
-    frame->f->submsg->endmsg(frame->closure, &d->status);
-    frame->f->endsubmsg(frame->closure, frame->f->fval);
-    --frame;
-    if (frame < d->stack) { d->exit(d->srcclosure); return; }
-    d->top = frame;
-    if (flow == UPB_SKIPSUBMSG) return;
-  }
+void _upb_dispatcher_abortjmp(upb_dispatcher *d) {
+  d->exitjmp(d->srcclosure);
+  assert(false);  // Never returns.
 }
diff --git a/upb/handlers.h b/upb/handlers.h
index e17a72694f..9ed02c114f 100644
--- a/upb/handlers.h
+++ b/upb/handlers.h
@@ -132,13 +132,15 @@ typedef upb_flow_t (upb_endfield_handler)(void *closure, upb_value fval);
 
 // A upb_fhandlers object represents the set of handlers associated with one
 // specific message field.
+//
+// TODO: remove upb_decoder-specific fields from this, and instead have
+// upb_decoderplan make a deep copy of the whole graph with its own fields
+// added.
 struct _upb_decoder;
 struct _upb_mhandlers;
 typedef struct _upb_fieldent {
-  bool junk;
   upb_fieldtype_t type;
   bool repeated;
-  bool is_repeated_primitive;
   upb_atomic_t refcount;
   uint32_t number;
   int32_t valuehasbit;
@@ -158,6 +160,11 @@ typedef struct _upb_fieldent {
   void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
 } upb_fhandlers;
 
+typedef struct {
+  bool junk;  // Stolen by table impl; see table.h for details.
+  upb_fhandlers *f;
+} upb_itofhandlers_ent;
+
 // fhandlers are created as part of a upb_handlers instance, but can be ref'd
 // and unref'd to prolong the life of the handlers.
 void upb_fhandlers_ref(upb_fhandlers *m);
@@ -194,16 +201,18 @@ typedef struct _upb_mhandlers {
   upb_inttable fieldtab;  // Maps field number -> upb_fhandlers.
   bool is_group;
 #ifdef UPB_USE_JIT_X64
-  uint32_t jit_startmsg_pclabel;
-  uint32_t jit_endofbuf_pclabel;
-  uint32_t jit_endofmsg_pclabel;
-  uint32_t jit_dyndispatch_pclabel;
-  uint32_t jit_unknownfield_pclabel;
-  int32_t jit_parent_field_done_pclabel;
+  // Used inside the JIT to track labels (jmp targets) in the generated code.
+  uint32_t jit_startmsg_pclabel;  // Starting a parse of this (sub-)message.
+  uint32_t jit_endofbuf_pclabel;  // ptr hitend, but delim_end or jit_end?
+  uint32_t jit_endofmsg_pclabel;  // Done parsing this (sub-)message.
+  uint32_t jit_dyndispatch_pclabel;  // Dispatch by table lookup.
+  uint32_t jit_unknownfield_pclabel;  // Parsed an unknown field.
   uint32_t max_field_number;
   // Currently keyed on field number.  Could also try keying it
   // on encoded or decoded tag, or on encoded field number.
   void **tablearray;
+  // Pointer to the JIT code for parsing this message.
+  void *jit_func;
 #endif
 } upb_mhandlers;
 
@@ -316,62 +325,47 @@ INLINE upb_mhandlers *upb_handlers_reghandlerset(upb_handlers *h, const upb_msgd
 typedef struct {
   upb_fhandlers *f;
   void *closure;
-
-  // Members to use as the data source requires.
-  void *srcclosure;
   uint64_t end_ofs;
-  uint16_t msgindex;
-  uint16_t fieldindex;
-
   bool is_sequence;   // frame represents seq or submsg? (f might be both).
   bool is_packed;     // !upb_issubmsg(f) && end_ofs != UINT64_MAX
                       // (strings aren't pushed).
 } upb_dispatcher_frame;
 
-// Called when some of the input needs to be skipped.  All frames from the
-// current top to "bottom", inclusive, should be skipped.
-typedef void upb_skip_handler(void *, upb_dispatcher_frame *bottom);
 typedef void upb_exit_handler(void *);
 
 typedef struct {
   upb_dispatcher_frame *top, *limit;
 
-  upb_handlers *handlers;
-
   // Msg and dispatch table for the current level.
   upb_mhandlers *msgent;
-  upb_inttable *dispatch_table;
-  upb_skip_handler *skip;
-  upb_exit_handler *exit;
+  upb_mhandlers *toplevel_msgent;
+  upb_exit_handler UPB_NORETURN *exitjmp;
   void *srcclosure;
   bool top_is_implicit;
 
   // Stack.
-  upb_status status;
+  upb_status *status;
   upb_dispatcher_frame stack[UPB_MAX_NESTING];
 } upb_dispatcher;
 
-void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
-                         upb_skip_handler *skip, upb_exit_handler *exit,
-                         void *closure);
-upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *topclosure);
+// Caller retains ownership of the status object.
+void upb_dispatcher_init(upb_dispatcher *d, upb_status *status,
+                         upb_exit_handler UPB_NORETURN *exit, void *closure);
+upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *topclosure,
+                                           upb_mhandlers *top_msg);
 void upb_dispatcher_uninit(upb_dispatcher *d);
 
 // Tests whether the message could legally end here (either the stack is empty
 // or the only open stack frame is implicit).
 bool upb_dispatcher_islegalend(upb_dispatcher *d);
 
-// Looks up a field by number for the current message.
-INLINE upb_fhandlers *upb_dispatcher_lookup(upb_dispatcher *d, uint32_t n) {
-  return (upb_fhandlers*)upb_inttable_fastlookup(
-      d->dispatch_table, n, sizeof(upb_fhandlers));
-}
-
-void _upb_dispatcher_unwind(upb_dispatcher *d, upb_flow_t flow);
+// Unwinds one or more stack frames based on the given flow constant that was
+// just returned from a handler.  Calls end handlers as appropriate.
+void _upb_dispatcher_abortjmp(upb_dispatcher *d) UPB_NORETURN;
 
 INLINE void _upb_dispatcher_sethas(void *_p, int32_t hasbit) {
   char *p = (char*)_p;
-  if (hasbit >= 0) p[hasbit / 8] |= (1 << (hasbit % 8));
+  if (hasbit >= 0) p[(uint32_t)hasbit / 8] |= (1 << ((uint32_t)hasbit % 8));
 }
 
 // Dispatch functions -- call the user handler and handle errors.
@@ -380,11 +374,12 @@ INLINE void upb_dispatch_value(upb_dispatcher *d, upb_fhandlers *f,
   upb_flow_t flow = UPB_CONTINUE;
   if (f->value) flow = f->value(d->top->closure, f->fval, val);
   _upb_dispatcher_sethas(d->top->closure, f->valuehasbit);
-  if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
+  if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d);
 }
 void upb_dispatch_startmsg(upb_dispatcher *d);
 void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status);
-upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, upb_fhandlers *f);
+upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
+                                               upb_fhandlers *f);
 upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d);
 upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, upb_fhandlers *f);
 upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d);
diff --git a/upb/msg.c b/upb/msg.c
index 78309cf7ed..77521e5f0a 100644
--- a/upb/msg.c
+++ b/upb/msg.c
@@ -86,14 +86,16 @@ void upb_stdmsg_sethas(void *_m, upb_value fval) {
   assert(_m != NULL);
   char *m = _m;
   const upb_fielddef *f = upb_value_getfielddef(fval);
-  if (f->hasbit >= 0) m[f->hasbit / 8] |= (1 << (f->hasbit % 8));
+  if (f->hasbit >= 0)
+    m[(uint32_t)f->hasbit / 8] |= (1 << ((uint32_t)f->hasbit % 8));
 }
 
 bool upb_stdmsg_has(const void *_m, upb_value fval) {
   assert(_m != NULL);
   const char *m = _m;
   const upb_fielddef *f = upb_value_getfielddef(fval);
-  return f->hasbit < 0 || (m[f->hasbit / 8] & (1 << (f->hasbit % 8)));
+  return f->hasbit < 0 ||
+      (m[(uint32_t)f->hasbit / 8] & (1 << ((uint32_t)f->hasbit % 8)));
 }
 
 #define UPB_ACCESSORS(type, ctype)                                            \
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index ae54e47af8..1b5fc17f54 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -13,14 +13,95 @@
 #include "upb/pb/decoder.h"
 #include "upb/pb/varint.h"
 
+/* upb_decoderplan ************************************************************/
+
 #ifdef UPB_USE_JIT_X64
-#define Dst_DECL upb_decoder *d
-#define Dst_REF (d->dynasm)
-#define Dst (d)
+// These defines are necessary for DynASM codegen.
+// See dynasm/dasm_proto.h for more info.
+#define Dst_DECL upb_decoderplan *plan
+#define Dst_REF (plan->dynasm)
+#define Dst (plan)
+
+// In debug mode, make DynASM do internal checks (must be defined before any
+// dasm header is included.
+#ifndef NDEBUG
+#define DASM_CHECKS
+#endif
+
 #include "dynasm/dasm_proto.h"
 #include "upb/pb/decoder_x64.h"
 #endif
 
+typedef struct {
+  upb_fhandlers base;
+  void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
+#ifdef UPB_USE_JIT_X64
+  uint32_t jit_pclabel;
+  uint32_t jit_pclabel_notypecheck;
+#endif
+} upb_dplanfield;
+
+typedef struct {
+  upb_mhandlers base;
+#ifdef UPB_USE_JIT_X64
+  uint32_t jit_startmsg_pclabel;
+  uint32_t jit_endofbuf_pclabel;
+  uint32_t jit_endofmsg_pclabel;
+  uint32_t jit_dyndispatch_pclabel;
+  uint32_t jit_unknownfield_pclabel;
+  int32_t jit_parent_field_done_pclabel;
+  uint32_t max_field_number;
+  // Currently keyed on field number.  Could also try keying it
+  // on encoded or decoded tag, or on encoded field number.
+  void **tablearray;
+#endif
+} upb_dplanmsg;
+
+static void *upb_decoderplan_fptrs[];
+
+void upb_decoderplan_initfhandlers(upb_fhandlers *f) {
+  f->decode = upb_decoderplan_fptrs[f->type];
+}
+
+upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) {
+  upb_decoderplan *p = malloc(sizeof(*p));
+  p->handlers = h;
+  upb_handlers_ref(h);
+  h->should_jit = allowjit;
+#ifdef UPB_USE_JIT_X64
+  p->jit_code = NULL;
+  if (allowjit) upb_decoderplan_makejit(p);
+#endif
+  // Set function pointers for each field's decode function.
+  for (int i = 0; i < h->msgs_len; i++) {
+    upb_mhandlers *m = h->msgs[i];
+    for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab);
+        !upb_inttable_done(i);
+        i = upb_inttable_next(&m->fieldtab, i)) {
+      upb_itofhandlers_ent *e = upb_inttable_iter_value(i);
+      upb_fhandlers *f = e->f;
+      upb_decoderplan_initfhandlers(f);
+    }
+  }
+  return p;
+}
+
+void upb_decoderplan_unref(upb_decoderplan *p) {
+  // TODO: make truly refcounted.
+  upb_handlers_unref(p->handlers);
+#ifdef UPB_USE_JIT_X64
+  if (p->jit_code) upb_decoderplan_freejit(p);
+#endif
+  free(p);
+}
+
+bool upb_decoderplan_hasjitcode(upb_decoderplan *p) {
+  return p->jit_code != NULL;
+}
+
+
+/* upb_decoder ****************************************************************/
+
 // It's unfortunate that we have to micro-manage the compiler this way,
 // especially since this tuning is necessarily specific to one hardware
 // configuration.  But emperically on a Core i7, performance increases 30-50%
@@ -29,18 +110,17 @@
 #define FORCEINLINE static __attribute__((always_inline))
 #define NOINLINE static __attribute__((noinline))
 
-static void upb_decoder_exit(upb_decoder *d) {
+UPB_NORETURN static void upb_decoder_exitjmp(upb_decoder *d) {
   // Resumable decoder would back out to completed_ptr (and possibly get a
   // previous buffer).
   siglongjmp(d->exitjmp, 1);
 }
-static void upb_decoder_exit2(void *_d) {
-  upb_decoder *d = _d;
-  upb_decoder_exit(d);
+UPB_NORETURN static void upb_decoder_exitjmp2(void *d) {
+  upb_decoder_exitjmp(d);
 }
-static void upb_decoder_abort(upb_decoder *d, const char *msg) {
-  upb_status_seterrliteral(d->status, msg);
-  upb_decoder_exit(d);
+UPB_NORETURN static void upb_decoder_abortjmp(upb_decoder *d, const char *msg) {
+  upb_status_seterrliteral(&d->status, msg);
+  upb_decoder_exitjmp(d);
 }
 
 /* Buffering ******************************************************************/
@@ -50,8 +130,12 @@ static void upb_decoder_abort(upb_decoder *d, const char *msg) {
 // the next one.  When we've committed our progress we discard any previous
 // buffers' regions.
 
-static uint32_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; }
-static void upb_decoder_advance(upb_decoder *d, uint32_t len) {
+static size_t upb_decoder_bufleft(upb_decoder *d) {
+  assert(d->end >= d->ptr);
+  return d->end - d->ptr;
+}
+
+static void upb_decoder_advance(upb_decoder *d, size_t len) {
   assert(upb_decoder_bufleft(d) >= len);
   d->ptr += len;
 }
@@ -66,29 +150,49 @@ uint64_t upb_decoder_bufendofs(upb_decoder *d) {
 
 static void upb_decoder_setmsgend(upb_decoder *d) {
   upb_dispatcher_frame *f = d->dispatcher.top;
-  uint32_t delimlen = f->end_ofs - d->bufstart_ofs;
-  uint32_t buflen = d->end - d->buf;
+  size_t delimlen = f->end_ofs - d->bufstart_ofs;
+  size_t buflen = d->end - d->buf;
   d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ?
       d->buf + delimlen : NULL;  // NULL if not in this buf.
   d->top_is_packed = f->is_packed;
+  d->dispatch_table = &d->dispatcher.msgent->fieldtab;
 }
 
-static bool upb_trypullbuf(upb_decoder *d) {
-  assert(upb_decoder_bufleft(d) == 0);
-  d->bufstart_ofs = upb_decoder_offset(d);
+static void upb_decoder_skiptonewbuf(upb_decoder *d, uint64_t ofs) {
+  assert(ofs >= upb_decoder_offset(d));
+  if (ofs > upb_byteregion_endofs(d->input))
+    upb_decoder_abortjmp(d, "Unexpected EOF");
   d->buf = NULL;
   d->ptr = NULL;
   d->end = NULL;
-  if (upb_byteregion_available(d->input, upb_decoder_offset(d)) == 0 &&
-      !upb_byteregion_fetch(d->input, d->status)) {
-    if (upb_eof(d->status)) return false;
-    upb_decoder_exit(d);  // Non-EOF error.
+  d->delim_end = NULL;
+#ifdef UPB_USE_JIT_X64
+  d->jit_end = NULL;
+#endif
+  d->bufstart_ofs = ofs;
+}
+
+static bool upb_trypullbuf(upb_decoder *d) {
+  assert(upb_decoder_bufleft(d) == 0);
+  upb_decoder_skiptonewbuf(d, upb_decoder_offset(d));
+  if (upb_byteregion_available(d->input, d->bufstart_ofs) == 0) {
+    switch (upb_byteregion_fetch(d->input)) {
+      case UPB_BYTE_OK:
+        assert(upb_byteregion_available(d->input, d->bufstart_ofs) > 0);
+        break;
+      case UPB_BYTE_EOF: return false;
+      case UPB_BYTE_ERROR: upb_decoder_abortjmp(d, "I/O error in input");
+      // Decoder resuming is not yet supported.
+      case UPB_BYTE_WOULDBLOCK:
+        upb_decoder_abortjmp(d, "Input returned WOULDBLOCK");
+    }
   }
-  uint32_t len;
+  size_t len;
   d->buf = upb_byteregion_getptr(d->input, d->bufstart_ofs, &len);
   assert(len > 0);
   d->ptr = d->buf;
   d->end = d->buf + len;
+  upb_decoder_setmsgend(d);
 #ifdef UPB_USE_JIT_X64
   // If we start parsing a value, we can parse up to 20 bytes without
   // having to bounds-check anything (2 10-byte varints).  Since the
@@ -96,27 +200,29 @@ static bool upb_trypullbuf(upb_decoder *d) {
   // JIT bails if there are not 20 bytes available.
   d->jit_end = d->end - 20;
 #endif
-  upb_decoder_setmsgend(d);
+  assert(upb_decoder_bufleft(d) > 0);
   return true;
 }
 
 static void upb_pullbuf(upb_decoder *d) {
-  if (!upb_trypullbuf(d)) upb_decoder_abort(d, "Unexpected EOF");
+  if (!upb_trypullbuf(d)) upb_decoder_abortjmp(d, "Unexpected EOF");
 }
 
-void upb_decoder_skipto(upb_decoder *d, uint64_t ofs) {
-  if (ofs < upb_decoder_bufendofs(d)) {
+void upb_decoder_checkpoint(upb_decoder *d) {
+  upb_byteregion_discard(d->input, upb_decoder_offset(d));
+}
+
+void upb_decoder_discardto(upb_decoder *d, uint64_t ofs) {
+  if (ofs <= upb_decoder_bufendofs(d)) {
     upb_decoder_advance(d, ofs - upb_decoder_offset(d));
   } else {
-    d->buf = NULL;
-    d->ptr = NULL;
-    d->end = NULL;
-    d->bufstart_ofs = ofs;
+    upb_decoder_skiptonewbuf(d, ofs);
   }
+  upb_decoder_checkpoint(d);
 }
 
-void upb_decoder_checkpoint(upb_decoder *d) {
-  upb_byteregion_discard(d->input, upb_decoder_offset(d));
+void upb_decoder_discard(upb_decoder *d, size_t bytes) {
+  upb_decoder_discardto(d, upb_decoder_offset(d) + bytes);
 }
 
 
@@ -126,15 +232,13 @@ NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) {
   uint8_t byte = 0x80;
   uint64_t u64 = 0;
   int bitpos;
-  const char *ptr = d->ptr;
   for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
-    if (upb_decoder_bufleft(d) == 0) {
-      upb_pullbuf(d);
-      ptr = d->ptr;
-    }
-    u64 |= ((uint64_t)(byte = *ptr++) & 0x7F) << bitpos;
+    if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d);
+    u64 |= ((uint64_t)(byte = *d->ptr) & 0x7F) << bitpos;
+    upb_decoder_advance(d, 1);
   }
-  if(bitpos == 70 && (byte & 0x80)) upb_decoder_abort(d, "Unterminated varint");
+  if(bitpos == 70 && (byte & 0x80))
+    upb_decoder_abortjmp(d, "Unterminated varint");
   return u64;
 }
 
@@ -151,7 +255,7 @@ FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d) {
   if ((*(p++) & 0x80) == 0) goto done;  // likely
 slow:
   u64 = upb_decode_varint_slow(d);
-  if (u64 > 0xffffffff) upb_decoder_abort(d, "Unterminated 32-bit varint");
+  if (u64 > UINT32_MAX) upb_decoder_abortjmp(d, "Unterminated 32-bit varint");
   ret = (uint32_t)u64;
   p = d->ptr;  // Turn the next line into a nop.
 done:
@@ -174,7 +278,7 @@ FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) {
   if (upb_decoder_bufleft(d) >= 10) {
     // Fast case.
     upb_decoderet r = upb_vdecode_fast(d->ptr);
-    if (r.p == NULL) upb_decoder_abort(d, "Unterminated varint");
+    if (r.p == NULL) upb_decoder_abortjmp(d, "Unterminated varint");
     upb_decoder_advance(d, r.p - d->ptr);
     return r.val;
   } else if (upb_decoder_bufleft(d) > 0) {
@@ -200,11 +304,12 @@ FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
   } else {
     // Slow case.
     size_t read = 0;
-    while (read < bytes) {
-      size_t avail = upb_decoder_bufleft(d);
+    while (1) {
+      size_t avail = UPB_MIN(upb_decoder_bufleft(d), bytes - read);
       memcpy(buf + read, d->ptr, avail);
       upb_decoder_advance(d, avail);
       read += avail;
+      if (read == bytes) break;
       upb_pullbuf(d);
     }
   }
@@ -213,26 +318,28 @@ FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
 FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) {
   uint32_t u32;
   upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t));
-  return u32;  // TODO: proper byte swapping
+  return u32;  // TODO: proper byte swapping for big-endian machines.
 }
 FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) {
   uint64_t u64;
   upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t));
-  return u64;  // TODO: proper byte swapping
+  return u64;  // TODO: proper byte swapping for big-endian machines.
 }
 
 INLINE upb_byteregion *upb_decode_string(upb_decoder *d) {
   uint32_t strlen = upb_decode_varint32(d);
   uint64_t offset = upb_decoder_offset(d);
+  if (offset + strlen > upb_byteregion_endofs(d->input))
+    upb_decoder_abortjmp(d, "Unexpected EOF");
   upb_byteregion_reset(&d->str_byteregion, d->input, offset, strlen);
   // Could make it an option on the callback whether we fetchall() first or not.
-  upb_byteregion_fetchall(&d->str_byteregion, d->status);
-  if (!upb_ok(d->status)) upb_decoder_exit(d);
-  upb_decoder_skipto(d, offset + strlen);
+  if (upb_byteregion_fetchall(&d->str_byteregion) != UPB_BYTE_OK)
+    upb_decoder_abortjmp(d, "Couldn't fetchall() on string.");
+  upb_decoder_discardto(d, offset + strlen);
   return &d->str_byteregion;
 }
 
-INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint64_t end) {
+INLINE void upb_push_msg(upb_decoder *d, upb_fhandlers *f, uint64_t end) {
   upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end;
   upb_decoder_setmsgend(d);
 }
@@ -253,8 +360,6 @@ INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint64_t end) {
 
 static double  upb_asdouble(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
 static float   upb_asfloat(uint32_t n)  { float  f; memcpy(&f, &n, 4); return f; }
-static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
-static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
 
 T(INT32,    varint,  int32,  int32_t)
 T(INT64,    varint,  int64,  int64_t)
@@ -271,9 +376,10 @@ T(FLOAT,    fixed32, float,  upb_asfloat)
 T(SINT32,   varint,  int32,  upb_zzdec_32)
 T(SINT64,   varint,  int64,  upb_zzdec_64)
 T(STRING,   string,  byteregion, upb_byteregion*)
+#undef T
 
 static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) {
-  upb_push(d, f, UPB_NONDELIMITED);
+  upb_push_msg(d, f, UPB_NONDELIMITED);
 }
 static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
   (void)f;
@@ -281,15 +387,30 @@ static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
   upb_decoder_setmsgend(d);
 }
 static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
-  upb_push(d, f, upb_decode_varint32(d) + upb_decoder_offset(d));
+  uint32_t len = upb_decode_varint32(d);
+  upb_push_msg(d, f, upb_decoder_offset(d) + len);
 }
 
+#define F(type) &upb_decode_ ## type
+static void *upb_decoderplan_fptrs[] = {
+    &upb_endgroup, F(DOUBLE), F(FLOAT), F(INT64),
+    F(UINT64), F(INT32), F(FIXED64), F(FIXED32), F(BOOL), F(STRING),
+    F(GROUP), F(MESSAGE), F(STRING), F(UINT32), F(ENUM), F(SFIXED32),
+    F(SFIXED64), F(SINT32), F(SINT64)};
+#undef F
+
 
 /* The main decoding loop *****************************************************/
 
 static void upb_decoder_checkdelim(upb_decoder *d) {
+  // TODO: This doesn't work for the case that no buffer is currently loaded
+  // (ie. d->buf == NULL) because delim_end is NULL even if we are at
+  // end-of-delim.  Need to add a test that exercises this by putting a buffer
+  // seam in the middle of the final delimited value in a proto that we skip
+  // for some reason (like because it's unknown and we have no unknown field
+  // handler).
   while (d->delim_end != NULL && d->ptr >= d->delim_end) {
-    if (d->ptr > d->delim_end) upb_decoder_abort(d, "Bad submessage end");
+    if (d->ptr > d->delim_end) upb_decoder_abortjmp(d, "Bad submessage end");
     if (d->dispatcher.top->is_sequence) {
       upb_dispatch_endseq(&d->dispatcher);
     } else {
@@ -299,33 +420,36 @@ static void upb_decoder_checkdelim(upb_decoder *d) {
   }
 }
 
-static void upb_decoder_enterjit(upb_decoder *d) {
-  (void)d;
-#ifdef UPB_USE_JIT_X64
-  if (d->jit_code && d->dispatcher.top == d->dispatcher.stack && d->ptr < d->jit_end) {
-    // Decodes as many fields as possible, updating d->ptr appropriately,
-    // before falling through to the slow(er) path.
-    void (*upb_jit_decode)(upb_decoder *d) = (void*)d->jit_code;
-    upb_jit_decode(d);
-  }
-#endif
-}
-
 INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
   while (1) {
     uint32_t tag;
     if (!upb_trydecode_varint32(d, &tag)) return NULL;
     uint8_t wire_type = tag & 0x7;
-    upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag);
+    uint32_t fieldnum = tag >> 3;
+    upb_itofhandlers_ent *e = upb_inttable_fastlookup(
+        d->dispatch_table, fieldnum, sizeof(upb_itofhandlers_ent));
+    upb_fhandlers *f = e ? e->f : NULL;
+
+    if (f) {
+      // Wire type check.
+      if (wire_type == upb_types[f->type].native_wire_type ||
+          (wire_type == UPB_WIRE_TYPE_DELIMITED &&
+           upb_types[f->type].is_numeric)) {
+        // Wire type is ok.
+      } else {
+        f = NULL;
+      }
+    }
 
     // There are no explicit "startseq" or "endseq" markers in protobuf
     // streams, so we have to infer them by noticing when a repeated field
     // starts or ends.
-    if (d->dispatcher.top->is_sequence && d->dispatcher.top->f != f) {
+    upb_dispatcher_frame *fr = d->dispatcher.top;
+    if (fr->is_sequence && fr->f != f) {
       upb_dispatch_endseq(&d->dispatcher);
       upb_decoder_setmsgend(d);
     }
-    if (f && f->repeated && d->dispatcher.top->f != f) {
+    if (f && f->repeated && (!fr->is_sequence || fr->f != f)) {
       uint64_t old_end = d->dispatcher.top->end_ofs;
       upb_dispatcher_frame *fr = upb_dispatch_startseq(&d->dispatcher, f);
       if (wire_type != UPB_WIRE_TYPE_DELIMITED ||
@@ -334,7 +458,8 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
         fr->end_ofs = old_end;
       } else {
         // Packed primitive field.
-        fr->end_ofs = upb_decoder_offset(d) + upb_decode_varint(d);
+        uint32_t len = upb_decode_varint32(d);
+        fr->end_ofs = upb_decoder_offset(d) + len;
         fr->is_packed = true;
       }
       upb_decoder_setmsgend(d);
@@ -343,14 +468,20 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
     if (f) return f;
 
     // Unknown field.
+    if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER)
+      upb_decoder_abortjmp(d, "Invalid field number");
     switch (wire_type) {
       case UPB_WIRE_TYPE_VARINT:    upb_decode_varint(d); break;
-      case UPB_WIRE_TYPE_32BIT:     upb_decoder_advance(d, 4); break;
-      case UPB_WIRE_TYPE_64BIT:     upb_decoder_advance(d, 8); break;
+      case UPB_WIRE_TYPE_32BIT:     upb_decoder_discard(d, 4); break;
+      case UPB_WIRE_TYPE_64BIT:     upb_decoder_discard(d, 8); break;
       case UPB_WIRE_TYPE_DELIMITED:
-        upb_decoder_advance(d, upb_decode_varint32(d)); break;
+        upb_decoder_discard(d, upb_decode_varint32(d)); break;
+      case UPB_WIRE_TYPE_START_GROUP:
+        upb_decoder_abortjmp(d, "Can't handle unknown groups yet");
+      case UPB_WIRE_TYPE_END_GROUP:
+        upb_decoder_abortjmp(d, "Unmatched ENDGROUP tag");
       default:
-        upb_decoder_abort(d, "Invalid wire type");
+        upb_decoder_abortjmp(d, "Invalid wire type");
     }
     // TODO: deliver to unknown field callback.
     upb_decoder_checkpoint(d);
@@ -358,16 +489,22 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
   }
 }
 
-void upb_decoder_decode(upb_decoder *d, upb_status *status) {
-  if (sigsetjmp(d->exitjmp, 0)) { assert(!upb_ok(status)); return; }
-  d->status = status;
+upb_success_t upb_decoder_decode(upb_decoder *d) {
+  assert(d->input);
+  if (sigsetjmp(d->exitjmp, 0)) {
+    assert(!upb_ok(&d->status));
+    return UPB_ERROR;
+  }
   upb_dispatch_startmsg(&d->dispatcher);
   // Prime the buf so we can hit the JIT immediately.
   upb_trypullbuf(d);
   upb_fhandlers *f = d->dispatcher.top->f;
-  while(1) { // Main loop: executed once per tag/field pair.
+  while(1) {
     upb_decoder_checkdelim(d);
+#ifdef UPB_USE_JIT_X64
     upb_decoder_enterjit(d);
+    upb_decoder_checkpoint(d);
+#endif
     if (!d->top_is_packed) f = upb_decode_tag(d);
     if (!f) {
       // Sucessful EOF.  We may need to dispatch a top-level implicit frame.
@@ -375,64 +512,46 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) {
         assert(d->dispatcher.top->is_sequence);
         upb_dispatch_endseq(&d->dispatcher);
       }
-      return;
+      return UPB_OK;
     }
     f->decode(d, f);
     upb_decoder_checkpoint(d);
   }
 }
 
-static void upb_decoder_skip(void *_d, upb_dispatcher_frame *f) {
-  upb_decoder *d = _d;
-  if (f->end_ofs != UPB_NONDELIMITED) {
-    upb_decoder_skipto(d, d->dispatcher.top->end_ofs);
-  } else {
-    // TODO: how to support skipping groups?  Dispatcher could drop callbacks,
-    // or it could be special-cased inside the decoder.
-  }
+void upb_decoder_init(upb_decoder *d) {
+  upb_status_init(&d->status);
+  upb_dispatcher_init(&d->dispatcher, &d->status, &upb_decoder_exitjmp2, d);
+  d->plan = NULL;
+  d->input = NULL;
 }
 
-void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
-  upb_dispatcher_init(
-      &d->dispatcher, handlers, upb_decoder_skip, upb_decoder_exit2, d);
-#ifdef UPB_USE_JIT_X64
-  d->jit_code = NULL;
-  if (d->dispatcher.handlers->should_jit) upb_decoder_makejit(d);
-#endif
-  // Set function pointers for each field's decode function.
-  for (int i = 0; i < handlers->msgs_len; i++) {
-    upb_mhandlers *m = handlers->msgs[i];
-    for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
-        i = upb_inttable_next(&m->fieldtab, i)) {
-      upb_fhandlers *f = upb_inttable_iter_value(i);
-#define F(type) &upb_decode_ ## type
-      static void *fptrs[] = {&upb_endgroup, F(DOUBLE), F(FLOAT), F(INT64),
-          F(UINT64), F(INT32), F(FIXED64), F(FIXED32), F(BOOL), F(STRING),
-          F(GROUP), F(MESSAGE), F(STRING), F(UINT32), F(ENUM), F(SFIXED32),
-          F(SFIXED64), F(SINT32), F(SINT64)};
-      f->decode = fptrs[f->type];
-    }
-  }
+void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset) {
+  assert(msg_offset >= 0);
+  assert(msg_offset < p->handlers->msgs_len);
+  d->plan = p;
+  d->msg_offset = msg_offset;
+  d->input = NULL;
 }
 
-void upb_decoder_reset(upb_decoder *d, upb_byteregion *input, void *closure) {
-  upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure);
+void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input,
+                            void *closure) {
+  assert(d->plan);
+  upb_dispatcher_frame *f =
+      upb_dispatcher_reset(&d->dispatcher, closure, d->plan->handlers->msgs[0]);
+  upb_status_clear(&d->status);
   f->end_ofs = UPB_NONDELIMITED;
   d->input = input;
-  d->bufstart_ofs = upb_byteregion_startofs(input);
-  d->buf = NULL;
-  d->ptr = NULL;
-  d->end = NULL;        // Force a buffer pull.
-  d->delim_end = NULL;  // But don't let end-of-message get triggered.
   d->str_byteregion.bytesrc = input->bytesrc;
-#ifdef UPB_USE_JIT_X64
-  d->jit_end = NULL;
-#endif
+
+  // Protect against assert in skiptonewbuf().
+  d->bufstart_ofs = 0;
+  d->ptr = NULL;
+  d->buf = NULL;
+  upb_decoder_skiptonewbuf(d, upb_byteregion_startofs(input));
 }
 
 void upb_decoder_uninit(upb_decoder *d) {
-#ifdef UPB_USE_JIT_X64
-  if (d->dispatcher.handlers->should_jit) upb_decoder_freejit(d);
-#endif
   upb_dispatcher_uninit(&d->dispatcher);
+  upb_status_uninit(&d->status);
 }
diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h
index c35bec4f83..13e5774936 100644
--- a/upb/pb/decoder.h
+++ b/upb/pb/decoder.h
@@ -21,15 +21,43 @@
 extern "C" {
 #endif
 
-/* upb_decoder *****************************************************************/
+/* upb_decoderplan ************************************************************/
+
+// A decoderplan contains whatever data structures and generated (JIT-ted) code
+// are necessary to decode protobuf data of a specific type to a specific set
+// of handlers.  By generating the plan ahead of time, we avoid having to
+// redo this work every time we decode.
+//
+// A decoderplan is threadsafe, meaning that it can be used concurrently by
+// different upb_decoders in different threads.  However, the upb_decoders are
+// *not* thread-safe.
+struct _upb_decoderplan;
+typedef struct _upb_decoderplan upb_decoderplan;
+
+// TODO: add parameter for a list of other decoder plans that we can share
+// generated code with.
+upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit);
+void upb_decoderplan_unref(upb_decoderplan *p);
+
+// Returns true if the plan contains JIT-ted code.  This may not be the same as
+// the "allowjit" parameter to the constructor if support for JIT-ting was not
+// compiled in.
+bool upb_decoderplan_hasjitcode(upb_decoderplan *p);
+
+
+/* upb_decoder ****************************************************************/
 
 struct dasm_State;
 
 typedef struct _upb_decoder {
-  upb_byteregion *input;          // Input data (serialized).
-  upb_dispatcher dispatcher;      // Dispatcher to which we push parsed data.
-  upb_status     *status;         // Where we will store any errors that occur.
-  upb_byteregion str_byteregion;  // For passing string data to callbacks.
+  upb_decoderplan *plan;
+  int             msg_offset;      // Which message from the plan is top-level.
+  upb_byteregion  *input;          // Input data (serialized), not owned.
+  upb_dispatcher  dispatcher;      // Dispatcher to which we push parsed data.
+  upb_status      status;          // Where we store errors that occur.
+  upb_byteregion  str_byteregion;  // For passing string data to callbacks.
+
+  upb_inttable    *dispatch_table;
 
   // Current input buffer and its stream offset.
   const char *buf, *ptr, *end;
@@ -37,40 +65,64 @@ typedef struct _upb_decoder {
 
   // End of the delimited region, relative to ptr, or NULL if not in this buf.
   const char *delim_end;
+  // True if the top stack frame represents a packed field.
   bool top_is_packed;
 
 #ifdef UPB_USE_JIT_X64
   // For JIT, which doesn't do bounds checks in the middle of parsing a field.
   const char *jit_end, *effective_end;  // == MIN(jit_end, submsg_end)
-
-  // JIT-generated machine code (else NULL).
-  char *jit_code;
-  size_t jit_size;
-  char *debug_info;
-
-  struct dasm_State *dynasm;
 #endif
 
   // For exiting the decoder on error.
   sigjmp_buf exitjmp;
 } upb_decoder;
 
-// Initializes/uninitializes a decoder for calling into the given handlers
-// or to write into the given msgdef, given its accessors).  Takes a ref
-// on the handlers.
-void upb_decoder_init(upb_decoder *d, upb_handlers *h);
+void upb_decoder_init(upb_decoder *d);
 void upb_decoder_uninit(upb_decoder *d);
 
-// Resets the internal state of an already-allocated decoder.  This puts it in a
-// state where it has not seen any data, and expects the next data to be from
-// the beginning of a new protobuf.  Decoders must be reset before they can be
-// used.  A decoder can be reset multiple times.  "input" must live until the
-// decoder is reset again (or destroyed).
-void upb_decoder_reset(upb_decoder *d, upb_byteregion *input, void *closure);
+// Resets the plan that the decoder will parse from.  "msg_offset" indicates
+// which message from the plan will be used as the top-level message.
+//
+// This will also reset the decoder's input to be uninitialized --
+// upb_decoder_resetinput() must be called before parsing can occur.  The plan
+// must live until the decoder is destroyed or reset to a different plan.
+//
+// Must be called before upb_decoder_resetinput() or upb_decoder_decode().
+void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset);
+
+// Resets the input of an already-allocated decoder.  This puts it in a state
+// where it has not seen any data, and expects the next data to be from the
+// beginning of a new protobuf.  Decoders must have their input reset before
+// they can be used.  A decoder can have its input reset multiple times.
+// "input" must live until the decoder is destroyed or has it input reset
+// again. "c" is the closure that will be passed to the handlers.
+//
+// Must be called before upb_decoder_decode().
+void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input, void *c);
+
+// Decodes serialized data (calling handlers as the data is parsed), returning
+// the success of the operation (call upb_decoder_status() for details).
+upb_success_t upb_decoder_decode(upb_decoder *d);
+
+INLINE const upb_status *upb_decoder_status(upb_decoder *d) {
+  return &d->status;
+}
+
+// Implementation details
+
+struct _upb_decoderplan {
+  upb_handlers *handlers;  // owns reference.
+
+#ifdef UPB_USE_JIT_X64
+  // JIT-generated machine code (else NULL).
+  char *jit_code;
+  size_t jit_size;
+  char *debug_info;
 
-// Decodes serialized data (calling handlers as the data is parsed) until error
-// or EOF (see *status for details).
-void upb_decoder_decode(upb_decoder *d, upb_status *status);
+  // This pointer is allocated by dasm_init() and freed by dasm_free().
+  struct dasm_State *dynasm;
+#endif
+};
 
 #ifdef __cplusplus
 }  /* extern "C" */
diff --git a/upb/pb/decoder_x64.dasc b/upb/pb/decoder_x64.dasc
index 75e5b6b46a..807191b8e6 100644
--- a/upb/pb/decoder_x64.dasc
+++ b/upb/pb/decoder_x64.dasc
@@ -4,20 +4,15 @@
 |// Copyright (c) 2011 Google Inc.  See LICENSE for details.
 |// Author: Josh Haberman <jhaberman@gmail.com>
 |//
-|// JIT compiler for upb_decoder on x86.  Given a upb_handlers object,
-|// generates code specialized to parsing the specific message and
-|// calling specific handlers.
+|// JIT compiler for upb_decoder on x86.  Given a upb_decoderplan object (which
+|// contains an embedded set of upb_handlers), generates code specialized to
+|// parsing the specific message and calling specific handlers.
 |//
 |// Since the JIT can call other functions (the JIT'ted code is not a leaf
 |// function) we must respect alignment rules.  On OS X, this means aligning
 |// the stack to 16 bytes.
 
-#define UPB_NONE -1
-#define UPB_MULTIPLE -2
-#define UPB_TOPLEVEL_ONE -3
-
 #include <sys/mman.h>
-#include "dynasm/dasm_proto.h"
 #include "dynasm/dasm_x86.h"
 
 #ifndef MAP_ANONYMOUS
@@ -73,15 +68,15 @@ gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL};
 
 void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); }
 
-void upb_reg_jit_gdb(upb_decoder *d) {
+void upb_reg_jit_gdb(upb_decoderplan *plan) {
   // Create debug info.
   size_t elf_len = sizeof(upb_jit_debug_elf_file);
-  d->debug_info = malloc(elf_len);
-  memcpy(d->debug_info, upb_jit_debug_elf_file, elf_len);
-  uint64_t *p = (void*)d->debug_info;
-  for (; (void*)(p+1) <= (void*)d->debug_info + elf_len; ++p) {
-    if (*p == 0x12345678) { *p = (uintptr_t)d->jit_code; }
-    if (*p == 0x321) { *p = d->jit_size; }
+  plan->debug_info = malloc(elf_len);
+  memcpy(plan->debug_info, upb_jit_debug_elf_file, elf_len);
+  uint64_t *p = (void*)plan->debug_info;
+  for (; (void*)(p+1) <= (void*)plan->debug_info + elf_len; ++p) {
+    if (*p == 0x12345678) { *p = (uintptr_t)plan->jit_code; }
+    if (*p == 0x321) { *p = plan->jit_size; }
   }
 
   // Register the JIT-ted code with GDB.
@@ -89,7 +84,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
   e->next_entry = __jit_debug_descriptor.first_entry;
   e->prev_entry = NULL;
   if (e->next_entry) e->next_entry->prev_entry = e;
-  e->symfile_addr = d->debug_info;
+  e->symfile_addr = plan->debug_info;
   e->symfile_size = elf_len;
   __jit_debug_descriptor.first_entry = e;
   __jit_debug_descriptor.relevant_entry = e;
@@ -99,12 +94,17 @@ void upb_reg_jit_gdb(upb_decoder *d) {
 
 #else
 
-void upb_reg_jit_gdb(upb_decoder *d) {
-  (void)d;
+void upb_reg_jit_gdb(upb_decoderplan *plan) {
+  (void)plan;
 }
 
 #endif
 
+// Has to be a separate function, otherwise GCC will complain about
+// expressions like (&foo != NULL) because they will never evaluate
+// to false.
+static void upb_assert_notnull(void *addr) { assert(addr != NULL); }
+
 |.arch x64
 |.actionlist upb_jit_actionlist
 |.globals UPB_JIT_GLOBAL_
@@ -126,7 +126,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
 |// ALL of the code in this file uses these register allocations.
 |// When we "call" within this file, we do not use regular calling
 |// conventions, but of course when calling to user callbacks we must.
-|.define PTR,       rbx
+|.define PTR,       rbx  // Writing this to DECODER->ptr commits our progress.
 |.define CLOSURE,   r12
 |.type   FRAME,     upb_dispatcher_frame, r13
 |.type   BYTEREGION,upb_byteregion, r14
@@ -134,6 +134,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
 |.type   STDARRAY,  upb_stdarray
 |
 |.macro callp, addr
+|| upb_assert_notnull(addr);
 || if ((uintptr_t)addr < 0xffffffff) {
      |  call   &addr
 || } else {
@@ -191,11 +192,12 @@ void upb_reg_jit_gdb(upb_decoder *d) {
 |  decode_loaded_varint, 0
 |  mov  ecx, edx
 |  shr  ecx, 3
-|  and  edx, 0x7
+|  and  edx, 0x7   // For the type check that will happen later.
 |  cmp  ecx, m->max_field_number  // Bounds-check the field.
 |  ja   ->exit_jit                // In the future; could be unknown label
 || if ((uintptr_t)m->tablearray < 0xffffffff) {
-|    mov  rax, qword [rcx*8 + m->tablearray]  // TODO: support hybrid array/hash tables.
+|    // TODO: support hybrid array/hash tables.
+|    mov  rax, qword [rcx*8 + m->tablearray]
 || } else {
 |    mov64  rax, (uintptr_t)m->tablearray
 |    mov  rax, qword [rax + rcx*8]
@@ -217,8 +219,9 @@ void upb_reg_jit_gdb(upb_decoder *d) {
 |  lea   rax, [FRAME + sizeof(upb_dispatcher_frame)]  // rax for shorter addressing.
 |  cmp   rax, qword DECODER->dispatcher.limit
 |  jae   ->exit_jit  // Frame stack overflow.
-|  mov   qword FRAME:rax->f, f
-|  mov   dword FRAME:rax->end_ofs, end_offset_
+|  mov64 r8, (uintptr_t)f
+|  mov   qword FRAME:rax->f, r8
+|  mov   qword FRAME:rax->end_ofs, end_offset_
 |  mov   byte FRAME:rax->is_sequence, is_sequence_
 |  mov   DECODER->dispatcher.top, rax
 |  mov   FRAME, rax
@@ -294,7 +297,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
 |
 |.macro sethas, reg, hasbit
 || if (hasbit >= 0) {
-|    or   byte [reg + (hasbit / 8)], (1 << (hasbit % 8))
+|    or   byte [reg + ((uint32_t)hasbit / 8)], (1 << ((uint32_t)hasbit % 8))
 || }
 |.endmacro
 
@@ -304,8 +307,9 @@ void upb_reg_jit_gdb(upb_decoder *d) {
 #include "upb/msg.h"
 
 // Decodes the next val into ARG3, advances PTR.
-static void upb_decoder_jit_decodefield(upb_decoder *d, upb_mhandlers *m,
-                                        uint8_t type, size_t tag_size) {
+static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan,
+                                            upb_mhandlers *m,
+                                            uint8_t type, size_t tag_size) {
   // Decode the value into arg 3 for the callback.
   switch (type) {
     case UPB_TYPE(DOUBLE):
@@ -365,9 +369,9 @@ static void upb_decoder_jit_decodefield(upb_decoder *d, upb_mhandlers *m,
       // robust checks.
       |  mov  ecx, dword [PTR + tag_size]
       |  decode_loaded_varint tag_size
-      |  mov  rdi, DECODER->effective_end
+      |  mov  rdi, DECODER->end
       |  sub  rdi, rax
-      |  cmp  ARG3_64, rdi  // if (len > d->effective_end - str)
+      |  cmp  ARG3_64, rdi  // if (len > d->end - str)
       |  ja   ->exit_jit    // Can't deliver, whole string not in buf.
 
       // Update PTR to point past end of string.
@@ -401,8 +405,8 @@ static void upb_decoder_jit_decodefield(upb_decoder *d, upb_mhandlers *m,
 #if 0
 // These appear not to speed things up, but keeping around for
 // further experimentation.
-static void upb_decoder_jit_doappend(upb_decoder *d, uint8_t size,
-                                     upb_fhandlers *f) {
+static void upb_decoderplan_jit_doappend(upb_decoderplan *plan, uint8_t size,
+                                         upb_fhandlers *f) {
   |  mov   eax, STDARRAY:ARG1_64->len
   |  cmp   eax, STDARRAY:ARG1_64->size
   |  jne   >2
@@ -434,18 +438,19 @@ static void upb_decoder_jit_doappend(upb_decoder *d, uint8_t size,
 }
 #endif
 
-static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
+static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
+                                       upb_fhandlers *f) {
   // Call callbacks.
   if (upb_issubmsgtype(f->type)) {
     if (f->type == UPB_TYPE(MESSAGE)) {
       |   mov   rsi, PTR
       |   sub   rsi, DECODER->buf
-      |   add   esi, ARG3_32   // = (d->ptr - d->buf) + delim_len
+      |   add   rsi, ARG3_64   // = (d->ptr - d->buf) + delim_len
     } else {
       assert(f->type == UPB_TYPE(GROUP));
-      |   mov   esi, UPB_NONDELIMITED
+      |   mov   rsi, UPB_NONDELIMITED
     }
-    |  pushframe  f, esi, false
+    |  pushframe  f, rsi, false
 
     // Call startsubmsg handler (if any).
     if (f->startsubmsg) {
@@ -456,15 +461,11 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
       |  mov  CLOSURE, rdx
     }
     |  mov   qword FRAME->closure, CLOSURE
+    // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+    |  mov   DECODER->ptr, PTR
 
     const upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
-    if (sub_m->jit_parent_field_done_pclabel != UPB_MULTIPLE) {
-      |  jmp   =>sub_m->jit_startmsg_pclabel;
-    } else {
-      |  call  =>sub_m->jit_startmsg_pclabel;
-    }
-
-    |=>f->jit_submsg_done_pclabel:
+    |  call  =>sub_m->jit_startmsg_pclabel;
 
     // Call endsubmsg handler (if any).
     if (f->endsubmsg) {
@@ -474,6 +475,8 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
       |  callp f->endsubmsg
     }
     |   popframe upb_fhandlers_getmsg(f)
+    // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+    |  mov   DECODER->ptr, PTR
   } else {
     |  mov ARG1_64, CLOSURE
     // Test for callbacks we can specialize.
@@ -499,15 +502,15 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
         f->value == &upb_stdmsg_setuint64_r ||
         f->value == &upb_stdmsg_setptr_r ||
         f->value == &upb_stdmsg_setdouble_r) {
-      upb_decoder_jit_doappend(d, 8, f);
+      upb_decoderplan_jit_doappend(plan, 8, f);
     } else if (f->value == &upb_stdmsg_setint32_r ||
                f->value == &upb_stdmsg_setuint32_r ||
                f->value == &upb_stdmsg_setfloat_r) {
-      upb_decoder_jit_doappend(d, 4, f);
+      upb_decoderplan_jit_doappend(plan, 4, f);
     } else if (f->value == &upb_stdmsg_setbool_r) {
-      upb_decoder_jit_doappend(d, 1, f);
+      upb_decoderplan_jit_doappend(plan, 1, f);
 #endif
-    } else {
+    } else if (f->value) {
       // Load closure and fval into arg registers.
       ||#ifndef NDEBUG
       ||// Since upb_value carries type information in debug mode
@@ -519,14 +522,15 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
       |  callp  f->value
     }
     |  sethas CLOSURE, f->valuehasbit
+    // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+    |  mov   DECODER->ptr, PTR
   }
-  // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
 }
 
 // PTR should point to the beginning of the tag.
-static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag,
-                                  uint32_t next_tag, upb_mhandlers *m,
-                                  upb_fhandlers *f, upb_fhandlers *next_f) {
+static void upb_decoderplan_jit_field(upb_decoderplan *plan, uint64_t tag,
+                                      uint64_t next_tag, upb_mhandlers *m,
+                                      upb_fhandlers *f, upb_fhandlers *next_f) {
   // PC-label for the dispatch table.
   // We check the wire type (which must be loaded in edx) because the
   // table is keyed on field number, not type.
@@ -535,8 +539,8 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag,
   |  jne  ->exit_jit     // In the future: could be an unknown field or packed.
   |=>f->jit_pclabel_notypecheck:
   if (f->repeated) {
-    |  mov   esi, FRAME->end_ofs
-    |  pushframe  f, esi, true
+    |  mov   rsi, FRAME->end_ofs
+    |  pushframe  f, rsi, true
     if (f->startseq) {
       |  mov   ARG1_64, CLOSURE
       |  loadfval f
@@ -555,8 +559,8 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag,
     return;
   }
 
-  upb_decoder_jit_decodefield(d, m, f->type, tag_size);
-  upb_decoder_jit_callcb(d, f);
+  upb_decoderplan_jit_decodefield(plan, m, f->type, tag_size);
+  upb_decoderplan_jit_callcb(plan, f);
 
   // Epilogue: load next tag, check for repeated field.
   |  check_eob   m
@@ -586,13 +590,11 @@ static int upb_compare_uint32(const void *a, const void *b) {
   return *(uint32_t*)a - *(uint32_t*)b;
 }
 
-static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
+static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) {
   |=>m->jit_startmsg_pclabel:
+  // There was a call to get here, so we need to align the stack.
+  |  sub  rsp, 8
 
-  if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
-    // There was a call to get here, so we need to align the stack.
-    |  sub  rsp, 8
-  }
   // Call startmsg handler (if any):
   if (m->startmsg) {
     // upb_flow_t startmsg(void *closure);
@@ -615,23 +617,30 @@ static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
   int num_keys = upb_inttable_count(&m->fieldtab);
   uint32_t *keys = malloc(num_keys * sizeof(*keys));
   int idx = 0;
-  for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+  for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab);
+      !upb_inttable_done(i);
       i = upb_inttable_next(&m->fieldtab, i)) {
     keys[idx++] = upb_inttable_iter_key(i);
   }
   qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);
 
   upb_fhandlers *last_f = NULL;
-  uint32_t last_tag = 0;
+  uint64_t last_encoded_tag = 0;
   for(int i = 0; i < num_keys; i++) {
-    uint32_t key = keys[i];
-    upb_fhandlers *f = upb_inttable_lookup(&m->fieldtab, key);
-    uint32_t tag = upb_vencode32(key);
-    if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f);
-    last_tag = tag;
+    uint32_t fieldnum = keys[i];
+    upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, fieldnum);
+    upb_fhandlers *f = e->f;
+    assert(f->number == fieldnum);
+    uint32_t tag = (f->number << 3) | upb_types[f->type].native_wire_type;
+    uint64_t encoded_tag = upb_vencode32(tag);
+    // No tag should be greater than 5 bytes.
+    assert(encoded_tag <= 0xffffffffff);
+    if (last_f) upb_decoderplan_jit_field(
+        plan, last_encoded_tag, encoded_tag, m, last_f, f);
+    last_encoded_tag = encoded_tag;
     last_f = f;
   }
-  upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL);
+  upb_decoderplan_jit_field(plan, last_encoded_tag, 0, m, last_f, NULL);
 
   free(keys);
 
@@ -655,22 +664,29 @@ static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
     |  callp m->endmsg
   }
 
-  if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
-    // Counter previous alignment.
-    |  add  rsp, 8
-    |  ret
-  } else if (m->jit_parent_field_done_pclabel == UPB_TOPLEVEL_ONE) {
-    |  jmp  ->exit_jit
-  } else {
-    |  jmp  =>m->jit_parent_field_done_pclabel
+  if (m->is_group) {
+    // Advance past the "end group" tag.
+    // TODO: Handle UPB_BREAK
+    |  mov   DECODER->ptr, PTR
   }
 
+  // Counter previous alignment.
+  |  add  rsp, 8
+  |  ret
 }
 
-static const char *dbgfmt =
-    "JIT encountered unknown field!  wt=%d, fn=%d\n";
-
-static void upb_decoder_jit(upb_decoder *d) {
+static void upb_decoderplan_jit(upb_decoderplan *plan) {
+  // The JIT prologue/epilogue trampoline that is generated in this function
+  // does not depend on the handlers, so it will never vary.  Ideally we would
+  // put it in an object file and just link it into upb so we could have only a
+  // single copy of it instead of one copy for each decoderplan.  But our
+  // options for doing that are undesirable: GCC inline assembly is
+  // complicated, not portable to other compilers, and comes with subtle
+  // caveats about incorrect things what the optimizer might do if you eg.
+  // execute non-local jumps.  Putting this code in a .s file would force us to
+  // calculate the structure offsets ourself instead of symbolically
+  // (ie. [r15 + 0xcd] instead of DECODER->ptr).  So we tolerate a bit of
+  // unnecessary duplication/redundancy.
   |  push  rbp
   |  mov   rbp, rsp
   |  push  r15
@@ -686,18 +702,14 @@ static void upb_decoder_jit(upb_decoder *d) {
   |  mov   CLOSURE, FRAME->closure
   |  mov   PTR, DECODER->ptr
 
-  upb_handlers *h = d->dispatcher.handlers;
-  if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
-    |  call  =>h->msgs[0]->jit_startmsg_pclabel
-    |  jmp   ->exit_jit
-  }
-
   // TODO: push return addresses for re-entry (will be necessary for multiple
   // buffer support).
-  for (int i = 0; i < h->msgs_len; i++) upb_decoder_jit_msg(d, h->msgs[i]);
+  |  call  ARG2_64
 
   |->exit_jit:
-  |  mov   DECODER->ptr, PTR
+  // Restore stack pointer to where it was before any "call" instructions
+  // inside our generated code.
+  |  lea   rsp, [rbp - 48]
   // Counter previous alignment.
   |  add   rsp, 8
   |  pop   rbx
@@ -707,122 +719,128 @@ static void upb_decoder_jit(upb_decoder *d) {
   |  pop   r15
   |  leave
   |  ret
-  |=>0:
-  |  mov rdi, stderr
-  |  mov rsi, dbgfmt
-  |  callp  fprintf
-  |  callp  abort
+
+  upb_handlers *h = plan->handlers;
+  for (int i = 0; i < h->msgs_len; i++)
+    upb_decoderplan_jit_msg(plan, h->msgs[i]);
 }
 
-void upb_decoder_jit_assignfieldlabs(upb_fhandlers *f,
-                                     uint32_t *pclabel_count) {
+static void upb_decoderplan_jit_assignfieldlabs(upb_fhandlers *f,
+                                                uint32_t *pclabel_count) {
   f->jit_pclabel = (*pclabel_count)++;
   f->jit_pclabel_notypecheck = (*pclabel_count)++;
-  f->jit_submsg_done_pclabel = (*pclabel_count)++;
 }
 
-void upb_decoder_jit_assignmsglabs(upb_mhandlers *m, uint32_t *pclabel_count) {
+static void upb_decoderplan_jit_assignmsglabs(upb_mhandlers *m,
+                                              uint32_t *pclabel_count) {
   m->jit_startmsg_pclabel = (*pclabel_count)++;
   m->jit_endofbuf_pclabel = (*pclabel_count)++;
   m->jit_endofmsg_pclabel = (*pclabel_count)++;
   m->jit_dyndispatch_pclabel = (*pclabel_count)++;
   m->jit_unknownfield_pclabel = (*pclabel_count)++;
-  m->jit_parent_field_done_pclabel = UPB_NONE;
   m->max_field_number = 0;
   upb_inttable_iter i;
   for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
       i = upb_inttable_next(&m->fieldtab, i)) {
     uint32_t key = upb_inttable_iter_key(i);
     m->max_field_number = UPB_MAX(m->max_field_number, key);
-    upb_fhandlers *f = upb_inttable_iter_value(i);
-    upb_decoder_jit_assignfieldlabs(f, pclabel_count);
+    upb_itofhandlers_ent *e = upb_inttable_iter_value(i);
+    upb_decoderplan_jit_assignfieldlabs(e->f, pclabel_count);
   }
-  // XXX: Won't work for large field numbers; will need to use a upb_table.
+  // TODO: support large field numbers by either using a hash table or
+  // generating code for a binary search.  For now large field numbers
+  // will just fall back to the table decoder.
+  m->max_field_number = UPB_MIN(m->max_field_number, 16000);
   m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*));
 }
 
-// Second pass: for messages that have only one parent, link them to the field
-// from which they are called.
-void upb_decoder_jit_assignmsglabs2(upb_mhandlers *m) {
-  upb_inttable_iter i;
-  for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
-      i = upb_inttable_next(&m->fieldtab, i)) {
-    upb_fhandlers *f = upb_inttable_iter_value(i);
-    if (upb_issubmsgtype(f->type)) {
-      upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
-      if (sub_m->jit_parent_field_done_pclabel == UPB_NONE) {
-        sub_m->jit_parent_field_done_pclabel = f->jit_submsg_done_pclabel;
-      } else {
-        sub_m->jit_parent_field_done_pclabel = UPB_MULTIPLE;
-      }
-    }
-  }
-}
-
-void upb_decoder_makejit(upb_decoder *d) {
-  d->debug_info = NULL;
+static void upb_decoderplan_makejit(upb_decoderplan *plan) {
+  plan->debug_info = NULL;
 
   // Assign pclabels.
-  uint32_t pclabel_count = 1;
-  upb_handlers *h = d->dispatcher.handlers;
+  uint32_t pclabel_count = 0;
+  upb_handlers *h = plan->handlers;
   for (int i = 0; i < h->msgs_len; i++)
-    upb_decoder_jit_assignmsglabs(h->msgs[i], &pclabel_count);
-  for (int i = 0; i < h->msgs_len; i++)
-    upb_decoder_jit_assignmsglabs2(h->msgs[i]);
-
-  if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_NONE) {
-    h->msgs[0]->jit_parent_field_done_pclabel = UPB_TOPLEVEL_ONE;
-  }
+    upb_decoderplan_jit_assignmsglabs(h->msgs[i], &pclabel_count);
 
   void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals));
-  dasm_init(d, 1);
-  dasm_setupglobal(d, globals, UPB_JIT_GLOBAL__MAX);
-  dasm_growpc(d, pclabel_count);
-  dasm_setup(d, upb_jit_actionlist);
+  dasm_init(plan, 1);
+  dasm_setupglobal(plan, globals, UPB_JIT_GLOBAL__MAX);
+  dasm_growpc(plan, pclabel_count);
+  dasm_setup(plan, upb_jit_actionlist);
 
-  upb_decoder_jit(d);
+  upb_decoderplan_jit(plan);
 
-  dasm_link(d, &d->jit_size);
+  int dasm_status = dasm_link(plan, &plan->jit_size);
+  (void)dasm_status;
+  assert(dasm_status == DASM_S_OK);
 
-  d->jit_code = mmap(NULL, d->jit_size, PROT_READ | PROT_WRITE,
-                     MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+  plan->jit_code = mmap(NULL, plan->jit_size, PROT_READ | PROT_WRITE,
+                        MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
 
-  upb_reg_jit_gdb(d);
+  upb_reg_jit_gdb(plan);
 
-  dasm_encode(d, d->jit_code);
+  dasm_encode(plan, plan->jit_code);
 
   // Create dispatch tables.
   for (int i = 0; i < h->msgs_len; i++) {
     upb_mhandlers *m = h->msgs[i];
+    m->jit_func =
+        plan->jit_code + dasm_getpclabel(plan, m->jit_startmsg_pclabel);
     for (uint32_t j = 0; j <= m->max_field_number; j++) {
-      upb_fhandlers *f = NULL;
-      for (int k = 0; k < 8; k++) {
-        f = upb_inttable_lookup(&m->fieldtab, (j << 3) | k);
-        if (f) break;
-      }
+      upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, j);
+      upb_fhandlers *f = e ? e->f : NULL;
       if (f) {
-        m->tablearray[j] = d->jit_code + dasm_getpclabel(d, f->jit_pclabel);
+        m->tablearray[j] =
+            plan->jit_code + dasm_getpclabel(plan, f->jit_pclabel);
       } else {
-        // Don't handle unknown fields yet.
-        m->tablearray[j] = d->jit_code + dasm_getpclabel(d, 0);
+        // TODO: extend the JIT to handle unknown fields.
+        // For the moment we exit the JIT for any unknown field.
+        m->tablearray[j] = globals[UPB_JIT_GLOBAL_exit_jit];
       }
     }
   }
 
-  dasm_free(d);
+  dasm_free(plan);
   free(globals);
 
-  mprotect(d->jit_code, d->jit_size, PROT_EXEC | PROT_READ);
+  mprotect(plan->jit_code, plan->jit_size, PROT_EXEC | PROT_READ);
 
   // View with: objdump -M intel -D -b binary -mi386 -Mx86-64 /tmp/machine-code
   // Or: ndisasm -b 64 /tmp/machine-code
   FILE *f = fopen("/tmp/machine-code", "wb");
-  fwrite(d->jit_code, d->jit_size, 1, f);
+  fwrite(plan->jit_code, plan->jit_size, 1, f);
   fclose(f);
 }
 
-void upb_decoder_freejit(upb_decoder *d) {
-  munmap(d->jit_code, d->jit_size);
-  free(d->debug_info);
+static void upb_decoderplan_freejit(upb_decoderplan *plan) {
+  munmap(plan->jit_code, plan->jit_size);
+  free(plan->debug_info);
   // TODO: unregister
 }
+
+static void upb_decoder_enterjit(upb_decoder *d) {
+  if (d->plan->jit_code &&
+      d->dispatcher.top == d->dispatcher.stack &&
+      d->ptr && d->ptr < d->jit_end) {
+#ifndef NDEBUG
+    register uint64_t rbx asm ("rbx") = 11;
+    register uint64_t r12 asm ("r12") = 12;
+    register uint64_t r13 asm ("r13") = 13;
+    register uint64_t r14 asm ("r14") = 14;
+    register uint64_t r15 asm ("r15") = 15;
+#endif
+    // Decodes as many fields as possible, updating d->ptr appropriately,
+    // before falling through to the slow(er) path.
+    void (*upb_jit_decode)(upb_decoder *d, void*) = (void*)d->plan->jit_code;
+    upb_jit_decode(d, d->plan->handlers->msgs[d->msg_offset]->jit_func);
+    assert(d->ptr <= d->end);
+
+    // Test that callee-save registers were properly restored.
+    assert(rbx == 11);
+    assert(r12 == 12);
+    assert(r13 == 13);
+    assert(r14 == 14);
+    assert(r15 == 15);
+  }
+}
diff --git a/upb/pb/glue.c b/upb/pb/glue.c
index 3176355ac9..4949fe3e24 100644
--- a/upb/pb/glue.c
+++ b/upb/pb/glue.c
@@ -12,8 +12,8 @@
 #include "upb/pb/glue.h"
 #include "upb/pb/textprinter.h"
 
-void upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md,
-                  upb_status *status) {
+bool upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md,
+                  bool allow_jit, upb_status *status) {
   upb_stringsrc strsrc;
   upb_stringsrc_init(&strsrc);
   upb_stringsrc_reset(&strsrc, str, len);
@@ -21,13 +21,21 @@ void upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md,
   upb_decoder d;
   upb_handlers *h = upb_handlers_new();
   upb_accessors_reghandlers(h, md);
-  upb_decoder_init(&d, h);
+  upb_decoderplan *p = upb_decoderplan_new(h, allow_jit);
+  upb_decoder_init(&d);
   upb_handlers_unref(h);
-  upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), msg);
-  upb_decoder_decode(&d, status);
+  upb_decoder_resetplan(&d, p, 0);
+  upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), msg);
+  upb_success_t ret = upb_decoder_decode(&d);
+  // stringsrc and the handlers registered by upb_accessors_reghandlers()
+  // should not suspend.
+  assert((ret == UPB_OK) == upb_ok(upb_decoder_status(&d)));
+  if (status) upb_status_copy(status, upb_decoder_status(&d));
 
   upb_stringsrc_uninit(&strsrc);
   upb_decoder_uninit(&d);
+  upb_decoderplan_unref(p);
+  return ret == UPB_OK;
 }
 
 void *upb_filetonewmsg(const char *fname, const upb_msgdef *md, upb_status *s) {
@@ -35,7 +43,7 @@ void *upb_filetonewmsg(const char *fname, const upb_msgdef *md, upb_status *s) {
   size_t len;
   char *data = upb_readfile(fname, &len);
   if (!data) goto err;
-  upb_strtomsg(data, len, msg, md, s);
+  upb_strtomsg(data, len, msg, md, false, s);
   if (!upb_ok(s)) goto err;
   return msg;
 
@@ -69,7 +77,6 @@ void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
 }
 #endif
 
-// TODO: read->load.
 upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
                                         upb_status *status) {
   upb_stringsrc strsrc;
@@ -79,17 +86,21 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
   upb_handlers *h = upb_handlers_new();
   upb_descreader_reghandlers(h);
 
+  upb_decoderplan *p = upb_decoderplan_new(h, false);
   upb_decoder d;
-  upb_decoder_init(&d, h);
+  upb_decoder_init(&d);
   upb_handlers_unref(h);
   upb_descreader r;
   upb_descreader_init(&r);
-  upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), &r);
+  upb_decoder_resetplan(&d, p, 0);
+  upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), &r);
 
-  upb_decoder_decode(&d, status);
+  upb_success_t ret = upb_decoder_decode(&d);
+  if (status) upb_status_copy(status, upb_decoder_status(&d));
   upb_stringsrc_uninit(&strsrc);
   upb_decoder_uninit(&d);
-  if (!upb_ok(status)) {
+  upb_decoderplan_unref(p);
+  if (ret != UPB_OK) {
     upb_descreader_uninit(&r);
     return NULL;
   }
diff --git a/upb/pb/glue.h b/upb/pb/glue.h
index 38e8d8ec06..ff8c85e535 100644
--- a/upb/pb/glue.h
+++ b/upb/pb/glue.h
@@ -36,8 +36,8 @@ extern "C" {
 
 // Decodes the given string, which must be in protobuf binary format, to the
 // given upb_msg with msgdef "md", storing the status of the operation in "s".
-void upb_strtomsg(const char *str, size_t len, void *msg,
-                  const upb_msgdef *md, upb_status *s);
+bool upb_strtomsg(const char *str, size_t len, void *msg,
+                  const upb_msgdef *md, bool allow_jit, upb_status *s);
 
 // Parses the given file into a new message of the given type.  Caller owns
 // the returned message (or NULL if an error occurred).
diff --git a/upb/pb/varint.h b/upb/pb/varint.h
index 19977e97e9..815a7a1ea2 100644
--- a/upb/pb/varint.h
+++ b/upb/pb/varint.h
@@ -19,6 +19,18 @@
 extern "C" {
 #endif
 
+// The maximum number of bytes that it takes to encode a 64-bit varint.
+// Note that with a better encoding this could be 9 (TODO: write up a
+// wiki document about this).
+#define UPB_PB_VARINT_MAX_LEN 10
+
+/* Zig-zag encoding/decoding **************************************************/
+
+INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
+INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
+INLINE uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
+INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
+
 /* Decoding *******************************************************************/
 
 // All decoding functions return this struct by value.
@@ -56,7 +68,7 @@ done:
 INLINE upb_decoderet upb_vdecode_branch64(const char *p) {
   uint64_t val;
   uint64_t b;
-  upb_decoderet r = {(void*)0, 0};
+  upb_decoderet r = {NULL, 0};
   b = *(p++); val  = (b & 0x7f)      ; if(!(b & 0x80)) goto done;
   b = *(p++); val |= (b & 0x7f) <<  7; if(!(b & 0x80)) goto done;
   b = *(p++); val |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
@@ -124,17 +136,33 @@ INLINE int upb_value_size(uint64_t val) {
   return val == 0 ? 1 : high_bit / 8 + 1;
 }
 
+// Encodes a 64-bit varint into buf (which must be >=UPB_PB_VARINT_MAX_LEN
+// bytes long), returning how many bytes were used.
+//
+// TODO: benchmark and optimize if necessary.
+INLINE size_t upb_vencode64(uint64_t val, char *buf) {
+  if (val == 0) { buf[0] = 0; return 1; }
+  size_t i = 0;
+  while (val) {
+    uint8_t byte = val & 0x7f;
+    val >>= 7;
+    if (val) byte |= 0x80;
+    buf[i++] = byte;
+  }
+  return i;
+}
+
 // Encodes a 32-bit varint, *not* sign-extended.
 INLINE uint64_t upb_vencode32(uint32_t val) {
+  char buf[UPB_PB_VARINT_MAX_LEN];
+  size_t bytes = upb_vencode64(val, buf);
   uint64_t ret = 0;
-  for (int bitpos = 0; val; bitpos+=8, val >>=7) {
-    if (bitpos > 0) ret |= (1 << (bitpos-1));
-    ret |= (val & 0x7f) << bitpos;
-  }
+  assert(bytes <= 5);
+  memcpy(&ret, buf, bytes);
+  assert(ret <= 0xffffffffff);
   return ret;
 }
 
-
 #ifdef __cplusplus
 }  /* extern "C" */
 #endif
diff --git a/upb/table.h b/upb/table.h
index 0786a1afc2..0c0a7854c0 100644
--- a/upb/table.h
+++ b/upb/table.h
@@ -127,6 +127,8 @@ INLINE bool _upb_inttable_isarrkey(const upb_inttable *t, uint32_t k) {
 // We have the caller specify the entry_size because fixing this as a literal
 // (instead of reading table->entry_size) gives the compiler more ability to
 // optimize.
+//
+// Note: All returned pointers are invalidated by inserts!
 INLINE void *_upb_inttable_fastlookup(const upb_inttable *t, uint32_t key,
                                       size_t entry_size, size_t value_size) {
   upb_inttable_value *arrval =
@@ -203,8 +205,11 @@ typedef struct {
 } upb_inttable_iter;
 
 upb_inttable_iter upb_inttable_begin(const upb_inttable *t);
-upb_inttable_iter upb_inttable_next(const upb_inttable *t, upb_inttable_iter iter);
-INLINE bool upb_inttable_done(upb_inttable_iter iter) { return iter.value == NULL; }
+upb_inttable_iter upb_inttable_next(const upb_inttable *t,
+                                    upb_inttable_iter iter);
+INLINE bool upb_inttable_done(upb_inttable_iter iter) {
+  return iter.value == NULL;
+}
 INLINE uint32_t upb_inttable_iter_key(upb_inttable_iter iter) {
   return iter.key;
 }
diff --git a/upb/upb.c b/upb/upb.c
index 5002e10b9c..a3e07e4a3f 100644
--- a/upb/upb.c
+++ b/upb/upb.c
@@ -15,29 +15,32 @@
 #include "upb/bytestream.h"
 
 #define alignof(t) offsetof(struct { char c; t x; }, x)
-#define TYPE_INFO(wire_type, ctype, inmemory_type) \
-    {alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), #ctype},
+#define TYPE_INFO(wire_type, ctype, inmemory_type, is_numeric) \
+    {alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), \
+     #ctype, is_numeric},
 
 const upb_type_info upb_types[] = {
-  TYPE_INFO(UPB_WIRE_TYPE_END_GROUP,   void*,     MESSAGE)   // ENDGROUP (fake)
-  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       double,    DOUBLE)    // DOUBLE
-  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       float,     FLOAT)     // FLOAT
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int64_t,   INT64)     // INT64
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint64_t,  UINT64)    // UINT64
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int32_t,   INT32)     // INT32
-  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       uint64_t,  UINT64)    // FIXED64
-  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       uint32_t,  UINT32)    // FIXED32
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      bool,      BOOL)      // BOOL
-  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     STRING)    // STRING
-  TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*,     MESSAGE)   // GROUP
-  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     MESSAGE)   // MESSAGE
-  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     STRING)    // BYTES
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint32_t,  UINT32)    // UINT32
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint32_t,  INT32)     // ENUM
-  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       int32_t,   INT32)     // SFIXED32
-  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       int64_t,   INT64)     // SFIXED64
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int32_t,   INT32)     // SINT32
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int64_t,   INT64)     // SINT64
+  // END_GROUP is not real, but used to signify the pseudo-field that
+  // ends a group from within the group.
+  TYPE_INFO(UPB_WIRE_TYPE_END_GROUP,   void*,     MESSAGE, false)   // ENDGROUP
+  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       double,    DOUBLE,  true)    // DOUBLE
+  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       float,     FLOAT,   true)    // FLOAT
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int64_t,   INT64,   true)    // INT64
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint64_t,  UINT64,  true)    // UINT64
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int32_t,   INT32,   true)    // INT32
+  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       uint64_t,  UINT64,  true)    // FIXED64
+  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       uint32_t,  UINT32,  true)    // FIXED32
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      bool,      BOOL,    true)    // BOOL
+  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     STRING,  false)   // STRING
+  TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*,     MESSAGE, false)   // GROUP
+  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     MESSAGE, false)   // MESSAGE
+  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     STRING,  false)   // BYTES
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint32_t,  UINT32,  true)    // UINT32
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint32_t,  INT32,   true)    // ENUM
+  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       int32_t,   INT32,   true)    // SFIXED32
+  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       int64_t,   INT64,   true)    // SFIXED64
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int32_t,   INT32,   true)    // SINT32
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int64_t,   INT64,   true)    // SINT64
 };
 
 #ifdef NDEBUG
@@ -66,13 +69,13 @@ void upb_status_seterrf(upb_status *s, const char *msg, ...) {
 }
 
 void upb_status_seterrliteral(upb_status *status, const char *msg) {
-  status->code = UPB_ERROR;
+  status->error = true;
   status->str = msg;
   status->space = NULL;
 }
 
 void upb_status_copy(upb_status *to, const upb_status *from) {
-  to->status = from->status;
+  to->error = from->error;
   to->eof = from->eof;
   to->code = from->code;
   to->space = from->space;
@@ -92,15 +95,20 @@ const char *upb_status_getstr(const upb_status *_status) {
   // Function is logically const but can modify internal state to materialize
   // the string.
   upb_status *status = (upb_status*)_status;
-  if (status->str == NULL && status->space && status->space->code_to_string) {
-    status->space->code_to_string(status->code, status->buf, status->bufsize);
-    status->str = status->buf;
+  if (status->str == NULL && status->space) {
+    if (status->space->code_to_string) {
+      status->space->code_to_string(status->code, status->buf, status->bufsize);
+      status->str = status->buf;
+    } else {
+      upb_status_seterrf(status, "No message, error space=%s, code=%d\n",
+                         status->space->name, status->code);
+    }
   }
   return status->str;
 }
 
 void upb_status_clear(upb_status *status) {
-  status->status = UPB_OK;
+  status->error = false;
   status->eof = false;
   status->code = 0;
   status->space = NULL;
@@ -114,19 +122,38 @@ void upb_status_setcode(upb_status *status, upb_errorspace *space, int code) {
 }
 
 void upb_status_fromerrno(upb_status *status) {
-  if (errno == 0) {
-    status->status = UPB_OK;
-  } else if (errno == EAGAIN || errno == EWOULDBLOCK) {
-    status->status = UPB_WOULDBLOCK;
-  } else {
-    status->status = UPB_ERROR;
+  if (errno != 0 && !upb_errno_is_wouldblock()) {
+    status->error = true;
+    upb_status_setcode(status, &upb_posix_errorspace, errno);
+  }
+}
+
+bool upb_errno_is_wouldblock() {
+  return
+#ifdef EAGAIN
+      errno == EAGAIN ||
+#endif
+#ifdef EWOULDBLOCK
+      errno == EWOULDBLOCK ||
+#endif
+      false;
+}
+
+bool upb_posix_codetostr(int code, char *buf, size_t len) {
+  if (strerror_r(code, buf, len) == -1) {
+    if (errno == EINVAL) {
+      return snprintf(buf, len, "Invalid POSIX error number %d\n", code) >= len;
+    } else if (errno == ERANGE) {
+      return false;
+    }
+    assert(false);
   }
-  upb_status_setcode(status, &upb_posix_errorspace, errno);
+  return true;
 }
 
-upb_errorspace upb_posix_errorspace = {"POSIX", NULL};  // TODO
+upb_errorspace upb_posix_errorspace = {"POSIX", &upb_posix_codetostr};
 
-int upb_vrprintf(char **buf, uint32_t *size, uint32_t ofs,
+int upb_vrprintf(char **buf, size_t *size, size_t ofs,
                  const char *fmt, va_list args) {
   // Try once without reallocating.  We have to va_copy because we might have
   // to call vsnprintf again.
@@ -141,7 +168,7 @@ int upb_vrprintf(char **buf, uint32_t *size, uint32_t ofs,
     // Need to print again, because some characters were truncated.  vsnprintf
     // will not write the entire string unless you give it space to store the
     // NULL terminator also.
-    while (*size < (ofs + true_len + 1)) *size = UPB_MAX(*size * 2, 2);
+    *size = (ofs + true_len + 1);
     char *newbuf = realloc(*buf, *size);
     if (!newbuf) return -1;
     vsnprintf(newbuf + ofs, true_len + 1, fmt, args);
diff --git a/upb/upb.h b/upb/upb.h
index e43418fbea..d11c7cb15a 100644
--- a/upb/upb.h
+++ b/upb/upb.h
@@ -10,10 +10,12 @@
 #ifndef UPB_H_
 #define UPB_H_
 
-#include <stdbool.h>
-#include <stdint.h>
 #include <assert.h>
 #include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
 #include "descriptor_const.h"
 #include "atomic.h"
 
@@ -26,6 +28,12 @@ extern "C" {
 #define INLINE static inline
 #endif
 
+#ifdef __GNUC__
+#define UPB_NORETURN __attribute__((__noreturn__))
+#else
+#define UPB_NORETURN
+#endif
+
 #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
 #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
 #define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
@@ -115,6 +123,7 @@ typedef struct {
   uint8_t native_wire_type;
   uint8_t inmemory_type;    // For example, INT32, SINT32, and SFIXED32 -> INT32
   const char *ctype;
+  bool is_numeric;  // Only numeric types can be packed.
 } upb_type_info;
 
 // A static array of info about all of the field types, indexed by type number.
@@ -176,6 +185,7 @@ typedef struct {
     return val.val.membername; \
   } \
   INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \
+    memset(val, 0, sizeof(*val)); \
     SET_TYPE(val->type, proto_type); \
     val->val.membername = cval; \
   } \
@@ -206,27 +216,31 @@ extern upb_value UPB_NO_VALUE;
 
 /* upb_status *****************************************************************/
 
-enum {
+typedef enum {
   UPB_OK,          // The operation completed successfully.
-  UPB_WOULDBLOCK,  // Stream is nonblocking and the operation would block.
+  UPB_SUSPENDED,   // The operation was suspended and may be resumed later.
   UPB_ERROR,       // An error occurred.
-};
+} upb_success_t;
 
 typedef struct {
   const char *name;
   // Writes a NULL-terminated string to "buf" containing an error message for
   // the given error code, returning false if the message was too large to fit.
-  bool (*code_to_string)(int code, char *buf, uint32_t len);
+  bool (*code_to_string)(int code, char *buf, size_t len);
 } upb_errorspace;
 
 typedef struct {
-  char status;
+  bool error;
   bool eof;
-  int code;   // Can be set to a more specific code (defined by error space).
+
+  // Specific status code defined by some error space (optional).
+  int code;
   upb_errorspace *space;
+
+  // Error message (optional).
   const char *str;  // NULL when no message is present.  NULL-terminated.
   char *buf;        // Owned by the status.
-  uint32_t bufsize;
+  size_t bufsize;
 } upb_status;
 
 #define UPB_STATUS_INIT {UPB_OK, false, 0, NULL, NULL, NULL, 0}
@@ -234,7 +248,7 @@ typedef struct {
 void upb_status_init(upb_status *status);
 void upb_status_uninit(upb_status *status);
 
-INLINE bool upb_ok(const upb_status *status) { return status->code == UPB_OK; }
+INLINE bool upb_ok(const upb_status *status) { return !status->error; }
 INLINE bool upb_eof(const upb_status *status) { return status->eof; }
 
 void upb_status_clear(upb_status *status);
@@ -248,6 +262,7 @@ void upb_status_copy(upb_status *to, const upb_status *from);
 
 extern upb_errorspace upb_posix_errorspace;
 void upb_status_fromerrno(upb_status *status);
+bool upb_errno_is_wouldblock();
 
 // Like vasprintf (which allocates a string large enough for the result), but
 // uses *buf (which can be NULL) as a starting point and reallocates it only if
@@ -255,7 +270,7 @@ void upb_status_fromerrno(upb_status *status);
 // of the buffer.  Starts writing at the given offset into the string; bytes
 // preceding this offset are unaffected.  Returns the new length of the string,
 // or -1 on memory allocation failure.
-int upb_vrprintf(char **buf, uint32_t *size, uint32_t ofs,
+int upb_vrprintf(char **buf, size_t *size, size_t ofs,
                  const char *fmt, va_list args);
 
 #ifdef __cplusplus