protobuf/upb/pb/decoder.c

/*
 * upb - a minimalist implementation of protocol buffers.
 *
 * Copyright (c) 2008-2011 Google Inc.  See LICENSE for details.
 * Author: Josh Haberman <jhaberman@gmail.com>
 */

#include <inttypes.h>
#include <setjmp.h>
#include <stddef.h>
#include <stdlib.h>
#include "upb/bytestream.h"
#include "upb/pb/decoder.h"
#include "upb/pb/varint.h"

#define UPB_NONDELIMITED (0xffffffffffffffffULL)

/* upb_pbdecoder ****************************************************************/

struct dasm_State;

typedef struct {
  const upb_fielddef *f;
  uint64_t end_ofs;
  uint32_t group_fieldnum;  // UINT32_MAX for non-groups.
  bool is_sequence;   // frame represents seq or submsg/str? (f might be both).
  bool is_packed;     // true for packed primitive sequences.
} frame;

struct upb_pbdecoder {
  // Where we push parsed data (not owned).
  upb_sink *sink;

  // Current input buffer and its stream offset.
  const char *buf, *ptr, *end, *checkpoint;
  uint64_t bufstart_ofs;

  // Buffer for residual bytes not parsed from the previous buffer.
  char residual[16];
  char *residual_end;

  // Stores the user buffer passed to our decode function.
  const char *buf_param;
  size_t size_param;

  // Equal to size_param while we are in the residual buf, 0 otherwise.
  size_t userbuf_remaining;

  // Used to temporarily store the return value before calling longjmp().
  size_t ret;

  // End of the delimited region, relative to ptr, or NULL if not in this buf.
  const char *delim_end;

#ifdef UPB_USE_JIT_X64
  // For JIT, which doesn't do bounds checks in the middle of parsing a field.
  const char *jit_end, *effective_end;  // == MIN(jit_end, delim_end)

  // Used momentarily by the generated code to store a value while a user
  // function is called.
  uint32_t tmp_len;

  const void *saved_rbp;
#endif

  // Our internal stack.
  frame *top, *limit;
  frame stack[UPB_MAX_NESTING];

  // For exiting the decoder on error.
  jmp_buf exitjmp;
};

typedef struct {
  // The top-level handlers that this plan calls into.  We own a ref.
  const upb_handlers *dest_handlers;

#ifdef UPB_USE_JIT_X64
  // JIT-generated machine code (else NULL).
  char *jit_code;
  size_t jit_size;
  char *debug_info;

  // For storing upb_jitmsginfo, which contains per-msg runtime data needed
  // by the JIT.
  // Maps upb_handlers* -> upb_jitmsginfo.
  upb_inttable msginfo;

  // The following members are used only while the JIT is being built.

  // This pointer is allocated by dasm_init() and freed by dasm_free().
  struct dasm_State *dynasm;

  // For storing pclabel bases while we are building the JIT.
  // Maps (upb_handlers* or upb_fielddef*) -> int32 pclabel_base
  upb_inttable pclabels;

  // For marking labels that should go into the generated code.
  // Maps pclabel -> owned char* label.
  upb_inttable asmlabels;

  // This is not the same as len(pclabels) because the table only contains base
  // offsets for each def, but each def can have many pclabels.
  uint32_t pclabel_count;
#endif
} decoderplan;

typedef struct {
  uint8_t native_wire_type;
  bool is_numeric;
} upb_decoder_typeinfo;

static const upb_decoder_typeinfo upb_decoder_types[] = {
  {UPB_WIRE_TYPE_END_GROUP,   false},  // ENDGROUP
  {UPB_WIRE_TYPE_64BIT,       true},   // DOUBLE
  {UPB_WIRE_TYPE_32BIT,       true},   // FLOAT
  {UPB_WIRE_TYPE_VARINT,      true},   // INT64
  {UPB_WIRE_TYPE_VARINT,      true},   // UINT64
  {UPB_WIRE_TYPE_VARINT,      true},   // INT32
  {UPB_WIRE_TYPE_64BIT,       true},   // FIXED64
  {UPB_WIRE_TYPE_32BIT,       true},   // FIXED32
  {UPB_WIRE_TYPE_VARINT,      true},   // BOOL
  {UPB_WIRE_TYPE_DELIMITED,   false},  // STRING
  {UPB_WIRE_TYPE_START_GROUP, false},  // GROUP
  {UPB_WIRE_TYPE_DELIMITED,   false},  // MESSAGE
  {UPB_WIRE_TYPE_DELIMITED,   false},  // BYTES
  {UPB_WIRE_TYPE_VARINT,      true},   // UINT32
  {UPB_WIRE_TYPE_VARINT,      true},   // ENUM
  {UPB_WIRE_TYPE_32BIT,       true},   // SFIXED32
  {UPB_WIRE_TYPE_64BIT,       true},   // SFIXED64
  {UPB_WIRE_TYPE_VARINT,      true},   // SINT32
  {UPB_WIRE_TYPE_VARINT,      true},   // SINT64
};

static upb_selector_t getselector(const upb_fielddef *f,
                                  upb_handlertype_t type) {
  upb_selector_t selector;
  bool ok = upb_handlers_getselector(f, type, &selector);
  UPB_ASSERT_VAR(ok, ok);
  return selector;
}


/* decoderplan ****************************************************************/

#ifdef UPB_USE_JIT_X64
// These defines are necessary for DynASM codegen.
// See dynasm/dasm_proto.h for more info.
#define Dst_DECL decoderplan *plan
#define Dst_REF (plan->dynasm)
#define Dst (plan)

// In debug mode, make DynASM do internal checks (must be defined before any
// dasm header is included.
#ifndef NDEBUG
#define DASM_CHECKS
#endif

#include "dynasm/dasm_proto.h"
#include "upb/pb/decoder_x64.h"
#endif

void freeplan(void *_p) {
  decoderplan *p = _p;
  upb_handlers_unref(p->dest_handlers, p);
#ifdef UPB_USE_JIT_X64
  if (p->jit_code) upb_decoderplan_freejit(p);
#endif
  free(p);
}

static const decoderplan *getdecoderplan(const upb_handlers *h) {
  if (upb_handlers_frametype(h) != upb_pbdecoder_getframetype())
    return NULL;
  upb_selector_t sel;
  if (!upb_handlers_getselector(UPB_BYTESTREAM_BYTES, UPB_HANDLER_STRING, &sel))
    return NULL;
  return upb_handlers_gethandlerdata(h, sel);
}

bool upb_pbdecoder_isdecoder(const upb_handlers *h) {
  return getdecoderplan(h) != NULL;
}

bool upb_pbdecoder_hasjitcode(const upb_handlers *h) {
#ifdef UPB_USE_JIT_X64
  const decoderplan *p = getdecoderplan(h);
  if (!p) return false;
  return p->jit_code != NULL;
#else
  UPB_UNUSED(h);
  return false;
#endif
}

const upb_handlers *upb_pbdecoder_getdesthandlers(const upb_handlers *h) {
  const decoderplan *p = getdecoderplan(h);
  if (!p) return NULL;
  return p->dest_handlers;
}


/* upb_pbdecoder ****************************************************************/

static bool in_residual_buf(const upb_pbdecoder *d, const char *p);

// It's unfortunate that we have to micro-manage the compiler this way,
// especially since this tuning is necessarily specific to one hardware
// configuration.  But emperically on a Core i7, performance increases 30-50%
// with these annotations.  Every instance where these appear, gcc 4.2.1 made
// the wrong decision and degraded performance in benchmarks.
#define FORCEINLINE static inline __attribute__((always_inline))
#define NOINLINE static __attribute__((noinline))

static upb_status *decoder_status(upb_pbdecoder *d) {
  // TODO(haberman): encapsulate this access to pipeline->status, but not sure
  // exactly what that interface should look like.
  return &d->sink->pipeline_->status_;
}

UPB_NORETURN static void exitjmp(upb_pbdecoder *d) {
  _longjmp(d->exitjmp, 1);
}

UPB_NORETURN static void abortjmp(upb_pbdecoder *d, const char *msg) {
  d->ret = in_residual_buf(d, d->checkpoint) ? 0 : (d->checkpoint - d->buf);
  upb_status_seterrliteral(decoder_status(d), msg);
  exitjmp(d);
}

/* Buffering ******************************************************************/

// We operate on one buffer at a time, which is either the user's buffer passed
// to our "decode" callback or some residual bytes from the previous buffer.

// How many bytes can be safely read from d->ptr.
static size_t bufleft(upb_pbdecoder *d) {
  assert(d->end >= d->ptr);
  return d->end - d->ptr;
}

// Overall offset of d->ptr.
uint64_t offset(const upb_pbdecoder *d) {
  return d->bufstart_ofs + (d->ptr - d->buf);
}

// Advances d->ptr.
static void advance(upb_pbdecoder *d, size_t len) {
  assert(bufleft(d) >= len);
  d->ptr += len;
}

// Commits d->ptr progress; should be called when an entire atomic value
// (ie tag+value) has been successfully consumed.
static void checkpoint(upb_pbdecoder *d) {
  d->checkpoint = d->ptr;
}

static bool in_buf(const char *p, const char *buf, const char *end) {
  return p >= buf && p <= end;
}

static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
  return in_buf(p, d->residual, d->residual_end);
}

// Calculates the delim_end value, which represents a combination of the
// current buffer and the stack, so must be called whenever either is updated.
static void set_delim_end(upb_pbdecoder *d) {
  frame *f = d->top;
  size_t delimlen = f->end_ofs - d->bufstart_ofs;
  size_t buflen = d->end - d->buf;
  d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ?
      d->buf + delimlen : NULL;  // NULL if not in this buf.
}

static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
  d->ptr = buf;
  d->buf = buf;
  d->end = end;
  set_delim_end(d);
#ifdef UPB_USE_JIT_X64
  // If we start parsing a value, we can parse up to 20 bytes without
  // having to bounds-check anything (2 10-byte varints).  Since the
  // JIT bounds-checks only *between* values (and for strings), the
  // JIT bails if there are not 20 bytes available.
  d->jit_end = d->end - 20;
#endif
}

static void suspendjmp(upb_pbdecoder *d) {
  switchtobuf(d, d->residual, d->residual_end);
  exitjmp(d);
}

static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
  assert(d->ptr == d->end);
  d->bufstart_ofs += (d->ptr - d->buf);
  switchtobuf(d, buf, buf + len);
}

static void skip(upb_pbdecoder *d, size_t bytes) {
  size_t avail = bufleft(d);
  size_t total_avail = avail + d->userbuf_remaining;
  if (avail >= bytes) {
    // Skipped data is all in current buffer.
    advance(d, bytes);
  } else if (total_avail >= bytes) {
    // Skipped data is all in residual buf and param buffer.
    assert(in_residual_buf(d, d->ptr));
    advance(d, avail);
    advancetobuf(d, d->buf_param, d->size_param);
    d->userbuf_remaining = 0;
    advance(d, bytes - avail);
  } else {
    // Skipped data extends beyond currently available buffers.
    // TODO: we need to do a checkdelim() equivalent that pops any frames that
    // we just skipped past.
    d->bufstart_ofs = offset(d) + bytes;
    d->residual_end = d->residual;
    d->ret += bytes - total_avail;
    suspendjmp(d);
  }
}

static void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) {
  assert(bytes <= bufleft(d));
  memcpy(buf, d->ptr, bytes);
  advance(d, bytes);
}

NOINLINE void getbytes_slow(upb_pbdecoder *d, void *buf, size_t bytes) {
  const size_t avail = bufleft(d);
  if (avail + d->userbuf_remaining >= bytes) {
    // Remaining residual buffer and param buffer together can satisfy.
    // (We are only called from getbytes() which has already verified that
    // the current buffer alone cannot satisfy).
    assert(in_residual_buf(d, d->ptr));
    consumebytes(d, buf, avail);
    advancetobuf(d, d->buf_param, d->size_param);
    consumebytes(d, buf + avail, bytes - avail);
    d->userbuf_remaining = 0;
  } else {
    // There is not enough remaining data, save residual bytes (if any)
    // starting at the last committed checkpoint and exit.
    if (in_buf(d->checkpoint, d->buf_param, d->buf_param + d->size_param)) {
      // Checkpoint was in user buf; old residual bytes not needed.
      d->ptr = d->checkpoint;
      size_t save = bufleft(d);
      assert(save <= sizeof(d->residual));
      memcpy(d->residual, d->ptr, save);
      d->residual_end = d->residual + save;
      d->bufstart_ofs = offset(d);
    } else {
      // Checkpoint was in residual buf; append user byte(s) to residual buf.
      assert(d->checkpoint == d->residual);
      assert((d->residual_end - d->residual) + d->size_param <=
             sizeof(d->residual));
      if (!in_residual_buf(d, d->ptr)) {
        d->bufstart_ofs -= (d->residual_end - d->residual);
      }
      memcpy(d->residual_end, d->buf_param, d->size_param);
      d->residual_end += d->size_param;
    }
    suspendjmp(d);
  }
}

FORCEINLINE void getbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
  if (bufleft(d) >= bytes) {
    // Buffer has enough data to satisfy.
    consumebytes(d, buf, bytes);
  } else {
    getbytes_slow(d, buf, bytes);
  }
}

FORCEINLINE uint8_t getbyte(upb_pbdecoder *d) {
  uint8_t byte;
  getbytes(d, &byte, 1);
  return byte;
}


/* Decoding of wire types *****************************************************/

NOINLINE uint64_t decode_varint_slow(upb_pbdecoder *d) {
  uint8_t byte = 0x80;
  uint64_t u64 = 0;
  int bitpos;
  for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
    u64 |= ((uint64_t)((byte = getbyte(d)) & 0x7F)) << bitpos;
  }
  if(bitpos == 70 && (byte & 0x80))
    abortjmp(d, "Unterminated varint");
  return u64;
}

NOINLINE uint32_t decode_v32_slow(upb_pbdecoder *d) {
  uint64_t u64 = decode_varint_slow(d);
  if (u64 > UINT32_MAX) abortjmp(d, "Unterminated 32-bit varint");
  return (uint32_t)u64;
}

// For tags and delimited lengths, which must be <=32bit and are usually small.
FORCEINLINE uint32_t decode_v32(upb_pbdecoder *d) {
  // Nearly all will be either 1 byte (1-16) or 2 bytes (17-2048).
  if (bufleft(d) >= 2) {
    uint32_t ret = d->ptr[0] & 0x7f;
    if ((d->ptr[0] & 0x80) == 0) {
      advance(d, 1);
      return ret;
    }
    ret |= (d->ptr[1] & 0x7f) << 7;
    if ((d->ptr[1] & 0x80) == 0) {
      advance(d, 2);
      return ret;
    }
  }
  return decode_v32_slow(d);
}

FORCEINLINE uint64_t decode_varint(upb_pbdecoder *d) {
  if (bufleft(d) >= 10) {
    // Fast case.
    upb_decoderet r = upb_vdecode_fast(d->ptr);
    if (r.p == NULL) abortjmp(d, "Unterminated varint");
    advance(d, r.p - d->ptr);
    return r.val;
  } else {
    // Slow case -- varint spans buffer seam.
    return decode_varint_slow(d);
  }
}

FORCEINLINE uint32_t decode_fixed32(upb_pbdecoder *d) {
  uint32_t u32;
  getbytes(d, &u32, 4);
  return u32;  // TODO: proper byte swapping for big-endian machines.
}

FORCEINLINE uint64_t decode_fixed64(upb_pbdecoder *d) {
  uint64_t u64;
  getbytes(d, &u64, 8);
  return u64;  // TODO: proper byte swapping for big-endian machines.
}

static void push(upb_pbdecoder *d, const upb_fielddef *f, bool is_sequence,
                 bool is_packed, int32_t group_fieldnum, uint64_t end) {
  frame *fr = d->top + 1;
  if (fr >= d->limit) abortjmp(d, "Nesting too deep.");
  fr->f = f;
  fr->is_sequence = is_sequence;
  fr->is_packed = is_packed;
  fr->end_ofs = end;
  fr->group_fieldnum = group_fieldnum;
  d->top = fr;
  set_delim_end(d);
}

static void push_msg(upb_pbdecoder *d, const upb_fielddef *f, uint64_t end) {
  if (!upb_sink_startsubmsg(d->sink, getselector(f, UPB_HANDLER_STARTSUBMSG)))
    abortjmp(d, "startsubmsg failed.");
  int32_t group_fieldnum = (end == UPB_NONDELIMITED) ?
      (int32_t)upb_fielddef_number(f) : -1;
  push(d, f, false, false, group_fieldnum, end);
}

static void push_seq(upb_pbdecoder *d, const upb_fielddef *f, bool packed,
                     uint64_t end_ofs) {
  if (!upb_sink_startseq(d->sink, getselector(f, UPB_HANDLER_STARTSEQ)))
    abortjmp(d, "startseq failed.");
  push(d, f, true, packed, -1, end_ofs);
}

static void push_str(upb_pbdecoder *d, const upb_fielddef *f, size_t len,
                     uint64_t end) {
  if (!upb_sink_startstr(d->sink, getselector(f, UPB_HANDLER_STARTSTR), len))
    abortjmp(d, "startseq failed.");
  push(d, f, false, false, -1, end);
}

static void pop_submsg(upb_pbdecoder *d) {
  upb_sink_endsubmsg(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSUBMSG));
  d->top--;
  set_delim_end(d);
}

static void pop_seq(upb_pbdecoder *d) {
  upb_sink_endseq(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSEQ));
  d->top--;
  set_delim_end(d);
}

static void pop_string(upb_pbdecoder *d) {
  upb_sink_endstr(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSTR));
  d->top--;
  set_delim_end(d);
}

static void checkdelim(upb_pbdecoder *d) {
  while (d->delim_end && d->ptr >= d->delim_end) {
    // TODO(haberman): not sure what to do about this; if we detect this error
    // we can possibly violate the promise that errors are always signaled by a
    // short "parsed byte" count (because all bytes might have been successfully
    // parsed prior to detecting this error).
    // if (d->ptr > d->delim_end) abortjmp(d, "Bad submessage end");
    if (d->top->is_sequence) {
      pop_seq(d);
    } else {
      pop_submsg(d);
    }
  }
}


/* Decoding of .proto types ***************************************************/

// Technically, we are losing data if we see a 32-bit varint that is not
// properly sign-extended.  We could detect this and error about the data loss,
// but proto2 does not do this, so we pass.

#define T(type, sel, wt, name, convfunc) \
  static void decode_ ## type(upb_pbdecoder *d, const upb_fielddef *f) { \
    upb_sink_put ## name(d->sink, getselector(f, UPB_HANDLER_ ## sel), \
                         (convfunc)(decode_ ## wt(d))); \
  } \

static double  upb_asdouble(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
static float   upb_asfloat(uint32_t n)  { float  f; memcpy(&f, &n, 4); return f; }

T(INT32,    INT32,  varint,  int32,  int32_t)
T(INT64,    INT64,  varint,  int64,  int64_t)
T(UINT32,   UINT32, varint,  uint32, uint32_t)
T(UINT64,   UINT64, varint,  uint64, uint64_t)
T(FIXED32,  UINT32, fixed32, uint32, uint32_t)
T(FIXED64,  UINT64, fixed64, uint64, uint64_t)
T(SFIXED32, INT32,  fixed32, int32,  int32_t)
T(SFIXED64, INT64,  fixed64, int64,  int64_t)
T(BOOL,     BOOL,   varint,  bool,   bool)
T(ENUM,     INT32,  varint,  int32,  int32_t)
T(DOUBLE,   DOUBLE, fixed64, double, upb_asdouble)
T(FLOAT,    FLOAT,  fixed32, float,  upb_asfloat)
T(SINT32,   INT32,  varint,  int32,  upb_zzdec_32)
T(SINT64,   INT64,  varint,  int64,  upb_zzdec_64)
#undef T

static void decode_GROUP(upb_pbdecoder *d, const upb_fielddef *f) {
  push_msg(d, f, UPB_NONDELIMITED);
}

static void decode_MESSAGE(upb_pbdecoder *d, const upb_fielddef *f) {
  uint32_t len = decode_v32(d);
  push_msg(d, f, offset(d) + len);
}

static void decode_STRING(upb_pbdecoder *d, const upb_fielddef *f) {
  uint32_t strlen = decode_v32(d);
  if (strlen <= bufleft(d)) {
    upb_sink_startstr(d->sink, getselector(f, UPB_HANDLER_STARTSTR), strlen);
    if (strlen)
      upb_sink_putstring(d->sink, getselector(f, UPB_HANDLER_STRING),
                         d->ptr, strlen);
    upb_sink_endstr(d->sink, getselector(f, UPB_HANDLER_ENDSTR));
    advance(d, strlen);
  } else {
    // Buffer ends in the middle of the string; need to push a decoder frame
    // for it.
    push_str(d, f, strlen, offset(d) + strlen);
    if (bufleft(d)) {
      upb_sink_putstring(d->sink, getselector(f, UPB_HANDLER_STRING),
                         d->ptr, bufleft(d));
      advance(d, bufleft(d));
    }
    d->bufstart_ofs = offset(d);
    d->residual_end = d->residual;
    suspendjmp(d);
  }
}


/* The main decoding loop *****************************************************/

static const upb_fielddef *decode_tag(upb_pbdecoder *d) {
  while (1) {
    uint32_t tag = decode_v32(d);
    uint8_t wire_type = tag & 0x7;
    uint32_t fieldnum = tag >> 3; const upb_fielddef *f = NULL;
    const upb_handlers *h = d->sink->top->h;  // TODO(haberman): rm
    f = upb_msgdef_itof(upb_handlers_msgdef(h), fieldnum);
    bool packed = false;

    if (f) {
      // Wire type check.
      upb_descriptortype_t type = upb_fielddef_descriptortype(f);
      if (wire_type == upb_decoder_types[type].native_wire_type) {
        // Wire type is ok.
      } else if ((wire_type == UPB_WIRE_TYPE_DELIMITED &&
                 upb_decoder_types[type].is_numeric)) {
        // Wire type is ok (and packed).
        packed = true;
      } else {
        f = NULL;
      }
    }

    // There are no explicit "startseq" or "endseq" markers in protobuf
    // streams, so we have to infer them by noticing when a repeated field
    // starts or ends.
    frame *fr = d->top;
    if (fr->is_sequence && fr->f != f) {
      pop_seq(d);
      fr = d->top;
    }

    if (f && upb_fielddef_isseq(f) && !fr->is_sequence) {
      if (packed) {
        uint32_t len = decode_v32(d);
        push_seq(d, f, true, offset(d) + len);
        checkpoint(d);
      } else {
        push_seq(d, f, false, fr->end_ofs);
      }
    }

    if (f) return f;

    // Unknown field or ENDGROUP.
    if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER)
      abortjmp(d, "Invalid field number");
    switch (wire_type) {
      case UPB_WIRE_TYPE_VARINT:    decode_varint(d); break;
      case UPB_WIRE_TYPE_32BIT:     skip(d, 4); break;
      case UPB_WIRE_TYPE_64BIT:     skip(d, 8); break;
      case UPB_WIRE_TYPE_DELIMITED: skip(d, decode_v32(d)); break;
      case UPB_WIRE_TYPE_START_GROUP:
        abortjmp(d, "Can't handle unknown groups yet");
      case UPB_WIRE_TYPE_END_GROUP:
        if (fieldnum != fr->group_fieldnum)
          abortjmp(d, "Unmatched ENDGROUP tag");
        pop_submsg(d);
        break;
      default:
        abortjmp(d, "Invalid wire type");
    }
    // TODO: deliver to unknown field callback.
    checkpoint(d);
    checkdelim(d);
  }
}

void *start(void *closure, const void *handler_data, size_t size_hint) {
  UPB_UNUSED(handler_data);
  UPB_UNUSED(size_hint);
  upb_pbdecoder *d = closure;
  assert(d);
  assert(d->sink);
  upb_sink_startmsg(d->sink);
  return d;
}

bool end(void *closure, const void *handler_data) {
  UPB_UNUSED(handler_data);
  upb_pbdecoder *d = closure;

  if (d->residual_end > d->residual) {
    // We have preserved bytes.
    upb_status_seterrliteral(decoder_status(d), "Unexpected EOF");
    return false;
  }

  // We may need to dispatch a top-level implicit frame.
  if (d->top == d->stack + 1 &&
      d->top->is_sequence &&
      !d->top->is_packed) {
    pop_seq(d);
  }
  if (d->top != d->stack) {
    upb_status_seterrliteral(
        decoder_status(d), "Ended inside delimited field.");
    return false;
  }
  upb_sink_endmsg(d->sink);
  return true;
}

size_t decode(void *closure, const void *hd, const char *buf, size_t size) {
  upb_pbdecoder *d = closure;
  const decoderplan *plan = hd;
  UPB_UNUSED(plan);
  assert(d->sink->top->h == plan->dest_handlers);

  if (size == 0) return 0;
  // Assume we'll consume the whole buffer unless this is overwritten.
  d->ret = size;
  d->buf_param = buf;
  d->size_param = size;

  if (_setjmp(d->exitjmp)) {
    // Hit end-of-buffer or error.
    return d->ret;
  }

  if (d->residual_end > d->residual) {
    // We have residual bytes from the last buffer.
    d->userbuf_remaining = d->size_param;
  } else {
    d->userbuf_remaining = 0;
    advancetobuf(d, buf, d->size_param);

    if (d->top != d->stack &&
        upb_fielddef_isstring(d->top->f) &&
        !d->top->is_sequence) {
      // Last buffer ended in the middle of a string; deliver more of it.
      size_t len = d->top->end_ofs - offset(d);
      if (d->size_param >= len) {
        upb_sink_putstring(d->sink, getselector(d->top->f, UPB_HANDLER_STRING),
                           d->ptr, len);
        advance(d, len);
        pop_string(d);
      } else {
        upb_sink_putstring(d->sink, getselector(d->top->f, UPB_HANDLER_STRING),
                           d->ptr, d->size_param);
        advance(d, d->size_param);
        d->residual_end = d->residual;
        advancetobuf(d, d->residual, 0);
        return d->size_param;
      }
    }
  }
  checkpoint(d);

  const upb_fielddef *f = d->top->f;
  while(1) {
#ifdef UPB_USE_JIT_X64
    upb_decoder_enterjit(d, plan);
    checkpoint(d);
    set_delim_end(d);  // JIT doesn't keep this current.
#endif
    checkdelim(d);
    if (!d->top->is_packed) {
      f = decode_tag(d);
    }

    switch (upb_fielddef_descriptortype(f)) {
      case UPB_DESCRIPTOR_TYPE_DOUBLE:   decode_DOUBLE(d, f);   break;
      case UPB_DESCRIPTOR_TYPE_FLOAT:    decode_FLOAT(d, f);    break;
      case UPB_DESCRIPTOR_TYPE_INT64:    decode_INT64(d, f);    break;
      case UPB_DESCRIPTOR_TYPE_UINT64:   decode_UINT64(d, f);   break;
      case UPB_DESCRIPTOR_TYPE_INT32:    decode_INT32(d, f);    break;
      case UPB_DESCRIPTOR_TYPE_FIXED64:  decode_FIXED64(d, f);  break;
      case UPB_DESCRIPTOR_TYPE_FIXED32:  decode_FIXED32(d, f);  break;
      case UPB_DESCRIPTOR_TYPE_BOOL:     decode_BOOL(d, f);     break;
      case UPB_DESCRIPTOR_TYPE_STRING:   UPB_FALLTHROUGH_INTENDED;
      case UPB_DESCRIPTOR_TYPE_BYTES:    decode_STRING(d, f);   break;
      case UPB_DESCRIPTOR_TYPE_GROUP:    decode_GROUP(d, f);    break;
      case UPB_DESCRIPTOR_TYPE_MESSAGE:  decode_MESSAGE(d, f);  break;
      case UPB_DESCRIPTOR_TYPE_UINT32:   decode_UINT32(d, f);   break;
      case UPB_DESCRIPTOR_TYPE_ENUM:     decode_ENUM(d, f);     break;
      case UPB_DESCRIPTOR_TYPE_SFIXED32: decode_SFIXED32(d, f); break;
      case UPB_DESCRIPTOR_TYPE_SFIXED64: decode_SFIXED64(d, f); break;
      case UPB_DESCRIPTOR_TYPE_SINT32:   decode_SINT32(d, f);   break;
      case UPB_DESCRIPTOR_TYPE_SINT64:   decode_SINT64(d, f);   break;
    }
    checkpoint(d);
  }
}

void init(void *_d, upb_pipeline *p) {
  UPB_UNUSED(p);
  upb_pbdecoder *d = _d;
  d->limit = &d->stack[UPB_MAX_NESTING];
  d->sink = NULL;
  // reset() must be called before decoding; this is guaranteed by assert() in
  // start().
}

void reset(void *_d) {
  upb_pbdecoder *d = _d;
  d->top = d->stack;
  d->top->is_sequence = false;
  d->top->is_packed = false;
  d->top->group_fieldnum = UINT32_MAX;
  d->top->end_ofs = UPB_NONDELIMITED;
  d->bufstart_ofs = 0;
  d->ptr = d->residual;
  d->buf = d->residual;
  d->end = d->residual;
  d->residual_end = d->residual;
}

bool upb_pbdecoder_resetsink(upb_pbdecoder *d, upb_sink* sink) {
  // TODO(haberman): typecheck the sink, and test whether the decoder is in the
  // middle of decoding.  Return false if either assumption is violated.
  d->sink = sink;
  reset(d);
  return true;
}

const upb_frametype upb_pbdecoder_frametype = {
  sizeof(upb_pbdecoder),
  init,
  NULL,
  reset,
};

const upb_frametype *upb_pbdecoder_getframetype() {
  return &upb_pbdecoder_frametype;
}

const upb_handlers *upb_pbdecoder_gethandlers(const upb_handlers *dest,
                                              bool allowjit,
                                              const void *owner) {
  UPB_UNUSED(allowjit);
  decoderplan *p = malloc(sizeof(*p));
  assert(upb_handlers_isfrozen(dest));
  p->dest_handlers = dest;
  upb_handlers_ref(dest, p);
#ifdef UPB_USE_JIT_X64
  p->jit_code = NULL;
  if (allowjit) upb_decoderplan_makejit(p);
#endif

  upb_handlers *h = upb_handlers_new(
      UPB_BYTESTREAM, &upb_pbdecoder_frametype, owner);
  upb_handlers_setstartstr(h, UPB_BYTESTREAM_BYTES, start, NULL, NULL);
  upb_handlers_setstring(h, UPB_BYTESTREAM_BYTES, decode, p, freeplan);
  upb_handlers_setendstr(h, UPB_BYTESTREAM_BYTES, end, NULL, NULL);
  return h;
}