Sync from Google-internal development.

11 years ago · d493500abc
parent 47b5e0968a
commit d493500abc
12 changed files with 451 additions and 150 deletions
--- a/tests/pb/test_decoder.cc
+++ b/tests/pb/test_decoder.cc
@ -612,6 +612,9 @@ void run_decoder(const string& proto, const string* expected_output) {
          if (ok) {
            fprintf(stderr, "Didn't expect ok result, but got output: '%s'\n",
                    output.c_str());
+          } else if (filter_hash) {
+            fprintf(stderr, "Failed as we expected, with message: %s\n",
+                    status.error_message());
          }
          ASSERT(!ok);
        }
@ -838,6 +841,10 @@ void test_invalid() {
  // Field number is 0.
  assert_does_not_parse(
      cat( tag(0, UPB_WIRE_TYPE_DELIMITED), varint(0) ));
+  // The previous test alone did not catch this particular pattern which could
+  // corrupt the internal state.
+  assert_does_not_parse(
+      cat( tag(0, UPB_WIRE_TYPE_64BIT), uint64(0) ));

  // Field number is too large.
  assert_does_not_parse(
@ -928,6 +935,12 @@ void test_valid() {
      submsg(12345, string("                ")),
      "<\n>\n");

+  // This triggered a previous bug in the decoder.
+  assert_successful_parse(
+      cat( tag(UPB_DESCRIPTOR_TYPE_SFIXED32, UPB_WIRE_TYPE_VARINT),
+           varint(0) ),
+      "<\n>\n");
+
  assert_successful_parse(
      cat(
        submsg(UPB_DESCRIPTOR_TYPE_MESSAGE,
--- a/tests/test_table.cc
+++ b/tests/test_table.cc
@ -284,6 +284,26 @@ int32_t *get_contiguous_keys(int32_t num) {
  return buf;
 }

+void test_delete() {
+  upb_inttable t;
+  upb_inttable_init(&t, UPB_CTYPE_BOOL);
+  upb_inttable_insert(&t, 0, upb_value_bool(true));
+  upb_inttable_insert(&t, 2, upb_value_bool(true));
+  upb_inttable_insert(&t, 4, upb_value_bool(true));
+  upb_inttable_compact(&t);
+  upb_inttable_remove(&t, 0, NULL);
+  upb_inttable_remove(&t, 2, NULL);
+  upb_inttable_remove(&t, 4, NULL);
+
+  upb_inttable_iter iter;
+  for (upb_inttable_begin(&iter, &t); !upb_inttable_done(&iter);
+       upb_inttable_next(&iter)) {
+    ASSERT(false);
+  }
+
+  upb_inttable_uninit(&t);
+}
+
 extern "C" {

 int run_tests(int argc, char *argv[]) {
@ -336,6 +356,9 @@ int run_tests(int argc, char *argv[]) {
  }
  test_inttable(keys4, 64, "Table size: 64, keys: 1-32 and 10133-10164 ====\n");
  delete[] keys4;
+
+  test_delete();
+
  return 0;
 }

--- a/upb/bindings/lua/upb/descriptor.c
+++ b/upb/bindings/lua/upb/descriptor.c
@ -1,20 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Google Inc.  See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- *
- * A Lua extension for upb/descriptor.
- */
-
-#include "upb/bindings/lua/upb.h"
-
-static const struct luaL_Reg toplevel_m[] = {
-  {NULL, NULL}
-};
-
-int luaopen_upb_descriptor(lua_State *L) {
-  lupb_newlib(L, "upb.descriptor", toplevel_m);
-
-  return 1;  // Return package table.
-}
--- a/upb/pb/compile_decoder.c
+++ b/upb/pb/compile_decoder.c
@ -509,9 +509,20 @@ static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
  return selector;
 }

+// Takes an existing, primary dispatch table entry and repacks it with a
+// different alternate wire type.  Called when we are inserting a secondary
+// dispatch table entry for an alternate wire type.
+static uint64_t repack(uint64_t dispatch, int new_wt2) {
+  uint64_t ofs;
+  uint8_t wt1;
+  uint8_t old_wt2;
+  upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
+  assert(old_wt2 == NO_WIRE_TYPE);  // wt2 should not be set yet.
+  return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
+}
+
 // Marks the current bytecode position as the dispatch target for this message,
 // field, and wire type.
-//
 static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
                           const upb_fielddef *f, int wire_type) {
  // Offset is relative to msg base.
@ -521,12 +532,12 @@ static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
  upb_value v;
  if (upb_inttable_remove(d, fn, &v)) {
    // TODO: prioritize based on packed setting in .proto file.
-    uint64_t oldval = upb_value_getuint64(v);
-    assert(((oldval >> 8) & 0xff) == 0);  // wt2 should not be set yet.
-    upb_inttable_insert(d, fn, upb_value_uint64(oldval | (wire_type << 8)));
+    uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
+    upb_inttable_insert(d, fn, upb_value_uint64(repacked));
    upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
  } else {
-    upb_inttable_insert(d, fn, upb_value_uint64((ofs << 16) | wire_type));
+    uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
+    upb_inttable_insert(d, fn, upb_value_uint64(val));
  }
 }

--- a/upb/pb/compile_decoder_x64.c
+++ b/upb/pb/compile_decoder_x64.c
@ -10,11 +10,48 @@
 #include <dlfcn.h>
 #include <stdio.h>
 #include <sys/mman.h>
+#include <unistd.h>
 #include "upb/pb/decoder.h"
 #include "upb/pb/decoder.int.h"
 #include "upb/pb/varint.int.h"
 #include "upb/shim/shim.h"

+// To debug the JIT:
+//
+// 1. Uncomment:
+// #define UPB_JIT_LOAD_SO
+//
+// Note: this mode requires that we can shell out to gcc.
+//
+// 2. Run the test once locally.  This will load the JIT code by building a
+//    .so (/tmp/upb-jit-code.so) and using dlopen, so more of the tooling will
+//    work properly (like GDB).
+//
+// IF YOU ALSO WANT AUTOMATIC JIT DEBUG OUTPUT:
+//
+// 3. Run: upb/pb/make-gdb-script.rb > script.gdb.  This reads
+//    /tmp/upb-jit-code.so as input and generates a GDB script that is specific
+//    to this jit code.
+//
+// 4. Run: gdb --command=script.gdb --args path/to/test
+//    This will drop you to a GDB prompt which you can now use normally.
+//    But when you run the test it will print a message to stdout every time
+//    the JIT executes assembly for a particular bytecode.  Sample output:
+//
+//    X.enterjit bytes=18
+//    buf_ofs=1 data_rem=17 delim_rem=-2 X.0x6.OP_PARSE_DOUBLE
+//    buf_ofs=9 data_rem=9 delim_rem=-10 X.0x7.OP_CHECKDELIM
+//    buf_ofs=9 data_rem=9 delim_rem=-10 X.0x8.OP_TAG1
+//    X.0x3.dispatch.DecoderTest
+//    X.parse_unknown
+//    X.0x3.dispatch.DecoderTest
+//    X.decode_unknown_tag_fallback
+//    X.exitjit
+//
+//    This output should roughly correspond to the output that the bytecode
+//    interpreter emits when compiled with UPB_DUMP_BYTECODE (modulo some
+//    extra JIT-specific output).
+
 // These defines are necessary for DynASM codegen.
 // See dynasm/dasm_proto.h for more info.
 #define Dst_DECL jitcompiler *jc
@ -31,8 +68,6 @@
 #define MAP_ANONYMOUS MAP_ANON
 #endif

-#define DECODE_EOF -3
-
 typedef struct {
  mgroup *group;
  uint32_t *pc;
@ -40,18 +75,39 @@ typedef struct {
  // This pointer is allocated by dasm_init() and freed by dasm_free().
  struct dasm_State *dynasm;

-  // Maps arbitrary void* -> pclabel.
-  upb_inttable pclabels;
-  upb_inttable pcdefined;
+  // Maps some key (an arbitrary void*) to a pclabel.
+  //
+  // The pclabel represents a location in the generated code -- DynASM exposes
+  // a pclabel -> (machine code offset) lookup function.
+  //
+  // The key can be anything.  There are two main kinds of keys:
+  //   - bytecode location -- the void* points to the bytecode instruction
+  //     itself.  We can then use this to generate jumps to this instruction.
+  //   - other object (like dispatch table).  We use these to represent parts
+  //     of the generated code that do not exactly correspond to a bytecode
+  //     instruction.
+  upb_inttable jmptargets;

-  // For marking labels that should go into the generated code.
-  // Maps pclabel -> char* label (string is owned by the table).
-  upb_inttable asmlabels;
+#ifndef NDEBUG
+  // Like jmptargets, but members are present in the table when they have had
+  // define_jmptarget() (as opposed to jmptarget) called.  Used to verify that
+  // define_jmptarget() is called exactly once for every target.
+  // The value is ignored.
+  upb_inttable jmpdefined;

  // For checking that two asmlabels aren't defined for the same byte.
  int lastlabelofs;
+#endif
+
+#ifdef UPB_JIT_LOAD_SO
+  // For marking labels that should go into the generated code.
+  // Maps pclabel -> char* label (string is owned by the table).
+  upb_inttable asmlabels;
+#endif

  // The total number of pclabels currently defined.
+  // Note that this contains both jmptargets and asmlabels, which both use
+  // pclabels but for different purposes.
  uint32_t pclabel_count;

  // Used by DynASM to store globals.
@ -59,10 +115,16 @@ typedef struct {
 } jitcompiler;

 // Functions called by codegen.
-static int pclabel(jitcompiler *jc, const void *here);
-static int define_pclabel(jitcompiler *jc, const void *here);
+static int jmptarget(jitcompiler *jc, const void *key);
+static int define_jmptarget(jitcompiler *jc, const void *key);
 static void asmlabel(jitcompiler *jc, const char *fmt, ...);
 static int pcofs(jitcompiler* jc);
+static int alloc_pclabel(jitcompiler *jc);
+
+#ifdef UPB_JIT_LOAD_SO
+static char *upb_vasprintf(const char *fmt, va_list ap);
+static char *upb_asprintf(const char *fmt, ...);
+#endif

 #include "dynasm/dasm_proto.h"
 #include "dynasm/dasm_x86.h"
@ -72,10 +134,14 @@ static jitcompiler *newjitcompiler(mgroup *group) {
  jitcompiler *jc = malloc(sizeof(jitcompiler));
  jc->group = group;
  jc->pclabel_count = 0;
+  upb_inttable_init(&jc->jmptargets, UPB_CTYPE_UINT32);
+#ifndef NDEBUG
  jc->lastlabelofs = -1;
-  upb_inttable_init(&jc->pclabels, UPB_CTYPE_UINT32);
-  upb_inttable_init(&jc->pcdefined, UPB_CTYPE_BOOL);
+  upb_inttable_init(&jc->jmpdefined, UPB_CTYPE_BOOL);
+#endif
+#ifdef UPB_JIT_LOAD_SO
  upb_inttable_init(&jc->asmlabels, UPB_CTYPE_PTR);
+#endif
  jc->globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*jc->globals));

  dasm_init(jc, 1);
@ -86,37 +152,106 @@ static jitcompiler *newjitcompiler(mgroup *group) {
 }

 static void freejitcompiler(jitcompiler *jc) {
+#ifdef UPB_JIT_LOAD_SO
  upb_inttable_iter i;
  upb_inttable_begin(&i, &jc->asmlabels);
  for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
    free(upb_value_getptr(upb_inttable_iter_value(&i)));
  }
  upb_inttable_uninit(&jc->asmlabels);
-  upb_inttable_uninit(&jc->pclabels);
-  upb_inttable_uninit(&jc->pcdefined);
+#endif
+#ifndef NDEBUG
+  upb_inttable_uninit(&jc->jmpdefined);
+#endif
+  upb_inttable_uninit(&jc->jmptargets);
  dasm_free(jc);
  free(jc->globals);
  free(jc);
 }

-// Returns a pclabel associated with the given arbitrary pointer.
-static int pclabel(jitcompiler *jc, const void *here) {
+#ifdef UPB_JIT_LOAD_SO
+
+// Like sprintf except allocates the string, which is returned and owned by the
+// caller.
+//
+// Like the GNU extension asprintf(), except we abort on error (since this is
+// only for debugging).
+static char *upb_vasprintf(const char *fmt, va_list args) {
+  // Run once to get the length of the string.
+  va_list args_copy;
+  va_copy(args_copy, args);
+  int len = vsnprintf(NULL, 0, fmt, args_copy);
+  va_end(args_copy);
+
+  char *ret = malloc(len + 1);  // + 1 for NULL terminator.
+  if (!ret) abort();
+  int written = vsnprintf(ret, len + 1, fmt, args);
+  UPB_ASSERT_VAR(written, written == len);
+
+  return ret;
+}
+
+static char *upb_asprintf(const char *fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+  char *ret = upb_vasprintf(fmt, args);
+  va_end(args);
+  return ret;
+}
+
+#endif
+
+static int alloc_pclabel(jitcompiler *jc) {
+  int newpc = jc->pclabel_count++;
+  dasm_growpc(jc, jc->pclabel_count);
+  return newpc;
+}
+
+static bool try_getjmptarget(jitcompiler *jc, const void *key, int *pclabel) {
  upb_value v;
-  bool found = upb_inttable_lookupptr(&jc->pclabels, here, &v);
-  if (!found) {
-    upb_value_setuint32(&v, jc->pclabel_count++);
-    dasm_growpc(jc, jc->pclabel_count);
-    upb_inttable_insertptr(&jc->pclabels, here, v);
+  if (upb_inttable_lookupptr(&jc->jmptargets, key, &v)) {
+    *pclabel = upb_value_getuint32(v);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+// Gets the pclabel for this bytecode location's jmptarget.  Requires that the
+// jmptarget() has been previously defined.
+static int getjmptarget(jitcompiler *jc, const void *key) {
+  int pclabel;
+  assert(upb_inttable_lookupptr(&jc->jmpdefined, key, NULL));
+  bool ok = try_getjmptarget(jc, key, &pclabel);
+  UPB_ASSERT_VAR(ok, ok);
+  return pclabel;
+}
+
+// Returns a pclabel that serves as a jmp target for the given bytecode pointer.
+// This should only be called for code that is jumping to the target; code
+// defining the target should use define_jmptarget().
+//
+// Creates/allocates a pclabel for this target if one does not exist already.
+static int jmptarget(jitcompiler *jc, const void *key) {
+  int pclabel;
+  if (!try_getjmptarget(jc, key, &pclabel)) {
+    pclabel = alloc_pclabel(jc);
+    upb_inttable_insertptr(&jc->jmptargets, key, upb_value_uint32(pclabel));
  }
-  return upb_value_getuint32(v);
+  return pclabel;
 }

-// Defines a pclabel associated with the given arbitrary pointer.
-// May only be called once (to avoid redefining the pclabel).
-static int define_pclabel(jitcompiler *jc, const void *here) {
-  // Will assert-fail if it already exists.
-  upb_inttable_insertptr(&jc->pcdefined, here, upb_value_bool(true));
-  return pclabel(jc, here);
+// Defines a pclabel associated with the given bytecode location.
+// Must be called exactly once by the code that is generating the code for this
+// bytecode.
+//
+// Must be called exactly once before bytecode generation is complete (this is a
+// sanity check to make sure the label is defined exactly once).
+static int define_jmptarget(jitcompiler *jc, const void *key) {
+#ifndef NDEBUG
+  upb_inttable_insertptr(&jc->jmpdefined, key, upb_value_bool(true));
+#endif
+  return jmptarget(jc, key);
 }

 // Returns a bytecode pc offset relative to the beginning of the group's code.
@ -126,27 +261,35 @@ static int pcofs(jitcompiler *jc) {

 static void upb_reg_jit_gdb(jitcompiler *jc);

-static int getpclabel(jitcompiler *jc, const void *target) {
-  return dasm_getpclabel(jc, pclabel(jc, target));
+// Returns a machine code offset corresponding to the given key.
+// Requires that this key was defined with define_jmptarget.
+static int machine_code_ofs(jitcompiler *jc, const void *key) {
+  int pclabel = getjmptarget(jc, key);
+  // Despite its name, this function takes a pclabel and returns the
+  // corresponding machine code offset.
+  return dasm_getpclabel(jc, pclabel);
 }

-// Given a pcofs relative to method, returns the machine code offset for it
-// (relative to the beginning of the machine code).
-int nativeofs(jitcompiler *jc, const upb_pbdecodermethod *method, int pcofs) {
-  void *target = jc->group->bytecode + method->code_base.ofs + pcofs;
-  return getpclabel(jc, target);
+// Returns a machine code offset corresponding to the given method-relative
+// bytecode offset.  Note that the bytecode offset is relative to the given
+// method, but the returned machine code offset is relative to the beginning of
+// *all* the machine code.
+static int machine_code_ofs2(jitcompiler *jc, const upb_pbdecodermethod *method,
+                             int pcofs) {
+  void *bc_target = jc->group->bytecode + method->code_base.ofs + pcofs;
+  return machine_code_ofs(jc, bc_target);
 }

 // Given a pcofs relative to this method's base, returns a machine code offset
-// relative to pclabel(dispatch->array) (which is used in jitdispatch as the
+// relative to jmptarget(dispatch->array) (which is used in jitdispatch as the
 // machine code base for dispatch table lookups).
 uint32_t dispatchofs(jitcompiler *jc, const upb_pbdecodermethod *method,
                     int pcofs) {
-  int ofs1 = getpclabel(jc, method->dispatch.array);
-  int ofs2 = nativeofs(jc, method, pcofs);
-  assert(ofs1 > 0);
-  assert(ofs2 > 0);
-  int ret = ofs2 - ofs1;
+  int mc_base = machine_code_ofs(jc, method->dispatch.array);
+  int mc_target = machine_code_ofs2(jc, method, pcofs);
+  assert(mc_base > 0);
+  assert(mc_target > 0);
+  int ret = mc_target - mc_base;
  assert(ret > 0);
  return ret;
 }
@ -160,31 +303,42 @@ static void patchdispatch(jitcompiler *jc) {
    method->is_native_ = true;

    upb_inttable *dispatch = &method->dispatch;
+
+    // Remove DISPATCH_ENDMSG -- only the bytecode interpreter needs it.
+    // And leaving it around will cause us to find field 0 improperly.
+    upb_inttable_remove(dispatch, DISPATCH_ENDMSG, NULL);
+
    upb_inttable_iter i2;
    upb_inttable_begin(&i2, dispatch);
    for (; !upb_inttable_done(&i2); upb_inttable_next(&i2)) {
      uintptr_t key = upb_inttable_iter_key(&i2);
-      if (key == 0) continue;
      uint64_t val = upb_value_getuint64(upb_inttable_iter_value(&i2));
      uint64_t newval;
      if (key <= UPB_MAX_FIELDNUMBER) {
        // Primary slot.
-        uint64_t oldofs = val >> 16;
-        uint64_t newofs = dispatchofs(jc, method, oldofs);
-        newval = (val & 0xffff) | (newofs << 16);
+        uint64_t ofs;
+        uint8_t wt1;
+        uint8_t wt2;
+        upb_pbdecoder_unpackdispatch(val, &ofs, &wt1, &wt2);
+
+        // Update offset and repack.
+        ofs = dispatchofs(jc, method, ofs);
+        newval = upb_pbdecoder_packdispatch(ofs, wt1, wt2);
        assert((int64_t)newval > 0);
      } else {
        // Secondary slot.  Since we have 64 bits for the value, we use an
        // absolute offset.
-        newval = (uint64_t)(jc->group->jit_code + nativeofs(jc, method, val));
+        int mcofs = machine_code_ofs2(jc, method, val);
+        newval = (uint64_t)(jc->group->jit_code + mcofs);
      }
      bool ok = upb_inttable_replace(dispatch, key, upb_value_uint64(newval));
      UPB_ASSERT_VAR(ok, ok);
    }

-    // Set this only *after* we have patched the offsets (nativeofs() above
-    // reads this).
-    method->code_base.ptr = jc->group->jit_code + getpclabel(jc, method);
+    // Update entry point for this method to point at mc base instead of bc
+    // base.  Set this only *after* we have patched the offsets
+    // (machine_code_ofs2() uses this).
+    method->code_base.ptr = jc->group->jit_code + machine_code_ofs(jc, method);

    upb_byteshandler *h = &method->input_handler_;
    upb_byteshandler_setstartstr(h, upb_pbdecoder_startjit, NULL);
@ -193,10 +347,8 @@ static void patchdispatch(jitcompiler *jc) {
  }
 }

-// Define for JIT debugging.
-//#define UPB_JIT_LOAD_SO
-
 #ifdef UPB_JIT_LOAD_SO
+
 static void load_so(jitcompiler *jc) {
  // Dump to a .so file in /tmp and load that, so all the tooling works right
  // (for example, debuggers and profilers will see symbol names for the JIT-ted
@ -204,10 +356,15 @@ static void load_so(jitcompiler *jc) {
  // interface is only used/understood by GDB.  Hopefully a standard will
  // develop for registering JIT-ted code that all tools will recognize,
  // rendering this obsolete.
-  //
-  // Requires that gcc is available from the command-line.

-  // Convert all asm labels from pclabel offsets to machine code offsets.
+  // jc->asmlabels maps:
+  //   pclabel -> char* label
+  //
+  // Use this to build mclabels, which maps:
+  //   machine code offset -> char* label
+  //
+  // Then we can use mclabels to emit the labels as we iterate over the bytes we
+  // are outputting.
  upb_inttable_iter i;
  upb_inttable mclabels;
  upb_inttable_init(&mclabels, UPB_CTYPE_PTR);
@ -218,8 +375,16 @@ static void load_so(jitcompiler *jc) {
                        upb_inttable_iter_value(&i));
  }

-  FILE *f = fopen("/tmp/upb-jit-code.s", "w");
-  if (f) {
+  // We write a .s file in text format, as input to the assembler.
+  // Then we run gcc to turn it into a .so file.
+  //
+  // The last "XXXXXX" will be replaced with something randomly generated by
+  // mkstmemp().  We don't add ".s" to this filename because it makes the string
+  // processing for mkstemp() and system() more complicated.
+  char s_filename[] = "/tmp/upb-jit-codeXXXXXX";
+  int fd = mkstemp(s_filename);
+  FILE *f;
+  if (fd >= 0 && (f = fdopen(fd, "wb")) != NULL) {
    uint8_t *jit_code = (uint8_t*)jc->group->jit_code;
    fputs("  .text\n\n", f);
    size_t linelen = 0;
@ -242,17 +407,33 @@ static void load_so(jitcompiler *jc) {
    fputs("\n", f);
    fclose(f);
  } else {
-    fprintf(stderr, "Couldn't open /tmp/upb-jit-code.s for writing\n");
+    fprintf(stderr, "Error opening tmp file for JIT debug output.\n");
    abort();
  }

-  // TODO: racy
-  if (system("gcc -shared -o /tmp/upb-jit-code.so /tmp/upb-jit-code.s") != 0) {
-    fprintf(stderr, "Error compiling upb-jit-code.s\n");
+  // This is exploitable if you have an adversary on your machine who can write
+  // to this tmp directory.  But this is just for debugging so we don't worry
+  // too much about that.  It shouldn't be prone to races against concurrent
+  // (non-adversarial) upb JIT's because we used mkstemp().
+  char *cmd = upb_asprintf("gcc -shared -o %s.so -x assembler %s", s_filename,
+                           s_filename);
+  if (system(cmd) != 0) {
+    fprintf(stderr, "Error compiling %s\n", s_filename);
    abort();
  }
+  free(cmd);
+
+  char *so_filename = upb_asprintf("%s.so", s_filename);

-  jc->group->dl = dlopen("/tmp/upb-jit-code.so", RTLD_LAZY);
+  // Some convenience symlinks.
+  // This is racy, but just for convenience.
+  unlink("/tmp/upb-jit-code.so");
+  unlink("/tmp/upb-jit-code.s");
+  symlink(s_filename, "/tmp/upb-jit-code.s");
+  symlink(so_filename, "/tmp/upb-jit-code.so");
+
+  jc->group->dl = dlopen(so_filename, RTLD_LAZY);
+  free(so_filename);
  if (!jc->group->dl) {
    fprintf(stderr, "Couldn't dlopen(): %s\n", dlerror());
    abort();
@ -267,6 +448,7 @@ static void load_so(jitcompiler *jc) {

  upb_inttable_uninit(&mclabels);
 }
+
 #endif

 void upb_pbdecoder_jit(mgroup *group) {
--- a/upb/pb/compile_decoder_x64.dasc
+++ b/upb/pb/compile_decoder_x64.dasc
@ -133,33 +133,44 @@
 |  add     PTR, 1
 |.endmacro

+#define DECODE_EOF -3
+
+static upb_func *gethandler(const upb_handlers *h, upb_selector_t sel) {
+  return h ? upb_handlers_gethandler(h, sel) : NULL;
+}
+
+// Defines an "assembly label" for the current code generation offset.
+// This label exists *purely* for debugging purposes: it is emitted into
+// the .so, and printed as part of JIT debugging output when UPB_JIT_LOAD_SO is
+// defined.
+//
+// We would define this in the .c file except that it conditionally defines a
+// pclabel.
 static void asmlabel(jitcompiler *jc, const char *fmt, ...) {
+#ifndef NDEBUG
  int ofs = jc->dynasm->section->ofs;
  assert(ofs != jc->lastlabelofs);
  jc->lastlabelofs = ofs;
+#endif
+
+#ifndef UPB_JIT_LOAD_SO
+  UPB_UNUSED(jc);
+  UPB_UNUSED(fmt);
+#else
  va_list args;
  va_start(args, fmt);
-
-  // Run once to get the length of the string.
-  va_list args_copy;
-  va_copy(args_copy, args);
-  int len = vsnprintf(NULL, 0, fmt, args_copy);
-  va_end(args_copy);
-
-  char *str = malloc(len + 1);  // + 1 for NULL terminator.
-  if (!str) exit(1);
-  int written = vsnprintf(str, len + 1, fmt, args);
+  char *str = upb_vasprintf(fmt, args);
  va_end(args);
-  UPB_ASSERT_VAR(written, written == len);

-  uint32_t label = jc->pclabel_count++;
-  dasm_growpc(jc, jc->pclabel_count);
-  |=>label:
-  upb_inttable_insert(&jc->asmlabels, label, upb_value_ptr(str));
-}
-
-static upb_func *gethandler(const upb_handlers *h, upb_selector_t sel) {
-  return h ? upb_handlers_gethandler(h, sel) : NULL;
+  int pclabel = alloc_pclabel(jc);
+  // Normally we would prefer to allocate this inline with the codegen,
+  // ie.
+  //   |=>asmlabel(...)
+  // But since we do this conditionally, only when UPB_JIT_LOAD_SO is defined,
+  // we do it here instead.
+  |=>pclabel:
+  upb_inttable_insert(&jc->asmlabels, pclabel, upb_value_ptr(str));
+#endif
 }

 // Should only be called when the associated handler is known to exist.
@ -696,7 +707,7 @@ static void jitdispatch(jitcompiler *jc,
  // this characteristic.
  bool has_multi_wiretype = has_hash_entries;

-  |=>define_pclabel(jc, &method->dispatch):
+  |=>define_jmptarget(jc, &method->dispatch):
  |1:
  // Decode the field tag.
  |  mov     aword DECODER->checkpoint, PTR
@ -751,8 +762,17 @@ static void jitdispatch(jitcompiler *jc,
    |  jne  >5
  }
  |  shr  rax, 16
+  |
+  |  // Load the machine code address from the table entry.
+  |  // The table entry is relative to the dispatch->array jmptarget
+  |  // (patchdispatch() took care of this) which is the same as
+  |  // local label "4".  The "lea" is really just trying to do
+  |  //    lea  rax, [>4 + rax]
+  |  //
+  |  // But we can't write that directly for some reason, so we use
+  |  // rdx as a temporary.
  |  lea  rdx, [>4]
-  |=>define_pclabel(jc, dispatch->array):
+  |=>define_jmptarget(jc, dispatch->array):
  |4:
  |  add  rax, rdx
  |  ret
@ -812,7 +832,7 @@ static void jittag(jitcompiler *jc, uint64_t tag, int n, int ofs,
  |  cmp   eax, DECODE_MISMATCH
  |  je    >3
  |  cmp   eax, DECODE_EOF
-  |  je     =>pclabel(jc, delimend)
+  |  je     =>jmptarget(jc, delimend)
  |  jmp   >5

  |1:
@ -844,12 +864,12 @@ static void jittag(jitcompiler *jc, uint64_t tag, int n, int ofs,
  |  je    >4
  |3:
  if (ofs == 0) {
-    |  call   =>pclabel(jc, &method->dispatch)
+    |  call   =>jmptarget(jc, &method->dispatch)
    |  test   rax, rax
-    |  jz     =>pclabel(jc, delimend)
+    |  jz     =>jmptarget(jc, delimend)
    |  jmp    rax
  } else {
-    |  jmp    =>pclabel(jc, jc->pc + ofs)
+    |  jmp    =>jmptarget(jc, jc->pc + ofs)
  }
  |4:
  |  add    PTR, n
@ -874,7 +894,7 @@ static void jitbytecode(jitcompiler *jc) {
      // Skipped for SETDISPATCH because it should point at the function
      // prologue, not the dispatch function that is emitted first.
      // TODO: optimize this to only define pclabels that are actually used.
-      |=>define_pclabel(jc, jc->pc):
+      |=>define_jmptarget(jc, jc->pc):
    }

    jc->pc++;
@ -936,8 +956,8 @@ static void jitbytecode(jitcompiler *jc) {

      // Emit function prologue for new method.
      asmlabel(jc, "0x%lx.parse.%s", pcofs(jc), msgname);
-      |=>define_pclabel(jc, op_pc):
-      |=>define_pclabel(jc, method):
+      |=>define_jmptarget(jc, op_pc):
+      |=>define_jmptarget(jc, method):
      |  sub   rsp, 8

      break;
@ -1082,13 +1102,13 @@ static void jitbytecode(jitcompiler *jc) {
      break;
    case OP_CHECKDELIM:
      |  cmp  DELIMEND, PTR
-      |  je   =>pclabel(jc, jc->pc + longofs)
+      |  je   =>jmptarget(jc, jc->pc + longofs)
      break;
    case OP_CALL:
-      |  call =>pclabel(jc, jc->pc + longofs)
+      |  call =>jmptarget(jc, jc->pc + longofs)
      break;
    case OP_BRANCH:
-      |  jmp  =>pclabel(jc, jc->pc + longofs);
+      |  jmp  =>jmptarget(jc, jc->pc + longofs);
      break;
    case OP_RET:
      |9:
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@ -567,7 +567,8 @@ static int32_t dispatch(upb_pbdecoder *d) {
  // Lookup tag.  Because of packed/non-packed compatibility, we have to
  // check the wire type against two possibilities.
  upb_value val;
-  if (upb_inttable_lookup32(dispatch, fieldnum, &val)) {
+  if (fieldnum != DISPATCH_ENDMSG &&
+      upb_inttable_lookup32(dispatch, fieldnum, &val)) {
    uint64_t v = upb_value_getuint64(val);
    if (wire_type == (v & 0xff)) {
      d->pc = d->top->base + (v >> 16);
--- a/upb/pb/decoder.h
+++ b/upb/pb/decoder.h
@ -146,19 +146,9 @@ UPB_DEFINE_STRUCT(upb_pbdecodermethod, upb_refcounted,
  // The destination handlers this method is bound to.  We own a ref.
  const upb_handlers *dest_handlers_;

-  // The dispatch table layout is:
-  //   [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ]
-  //
-  // If wt1 matches, jump to the 48-bit offset.  If wt2 matches, lookup
-  // (UPB_MAX_FIELDNUMBER + fieldnum) and jump there.
-  //
-  // We need two wire types because of packed/non-packed compatibility.  A
-  // primitive repeated field can use either wire type and be valid.  While we
-  // could key the table on fieldnum+wiretype, the table would be 8x sparser.
-  //
-  // Storing two wire types in the primary value allows us to quickly rule out
-  // the second wire type without needing to do a separate lookup (this case is
-  // less common than an unknown field).
+  // Dispatch table -- used by both bytecode decoder and JIT when encountering a
+  // field number that wasn't the one we were expecting to see.  See
+  // decoder.int.h for the layout of this table.
  upb_inttable dispatch;
 ));

--- a/upb/pb/decoder.int.h
+++ b/upb/pb/decoder.int.h
@ -147,6 +147,35 @@ void upb_pbdecoder_freejit(mgroup *group);
 // RET) for branching to when we find an appropriate ENDGROUP tag.
 #define DISPATCH_ENDMSG 0

+// It's important to use this invalid wire type instead of 0 (which is a valid
+// wire type).
+#define NO_WIRE_TYPE 0xff
+
+// The dispatch table layout is:
+//   [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ]
+//
+// If wt1 matches, jump to the 48-bit offset.  If wt2 matches, lookup
+// (UPB_MAX_FIELDNUMBER + fieldnum) and jump there.
+//
+// We need two wire types because of packed/non-packed compatibility.  A
+// primitive repeated field can use either wire type and be valid.  While we
+// could key the table on fieldnum+wiretype, the table would be 8x sparser.
+//
+// Storing two wire types in the primary value allows us to quickly rule out
+// the second wire type without needing to do a separate lookup (this case is
+// less common than an unknown field).
+UPB_INLINE uint64_t upb_pbdecoder_packdispatch(uint64_t ofs, uint8_t wt1,
+                                               uint8_t wt2) {
+  return (ofs << 16) | (wt2 << 8) | wt1;
+}
+
+UPB_INLINE void upb_pbdecoder_unpackdispatch(uint64_t dispatch, uint64_t *ofs,
+                                             uint8_t *wt1, uint8_t *wt2) {
+  *wt1 = (uint8_t)dispatch;
+  *wt2 = (uint8_t)(dispatch >> 8);
+  *ofs = dispatch >> 16;
+}
+
 // All of the functions in decoder.c that return int32_t return values according
 // to the following scheme:
 //   1. negative values indicate a return code from the following list.
--- a/upb/pb/make-gdb-script.rb
+++ b/upb/pb/make-gdb-script.rb
@ -0,0 +1,36 @@
+#!/usr/bin/ruby
+
+puts "set width 0
+set height 0
+set verbose off\n\n"
+
+IO.popen("nm -S /tmp/upb-jit-code.so").each_line { |line|
+  # Input lines look like this:
+  #   000000000000575a T X.0x10.OP_CHECKDELIM
+  #
+  # For each one we want to emit a command that looks like:
+  #   b X.0x10.OP_CHECKDELIM
+  #   commands
+  #     silent
+  #     printf "buf_ofs=%d data_rem=%d delim_rem=%d X.0x10.OP_CHECKDELIM\n", $rbx - (long)((upb_pbdecoder*)($r15))->buf, $r12 - $rbx, $rbp - $rbx
+  #     continue
+  #   end
+
+  parts = line.split
+  next if parts[1] != "T"
+  sym = parts[2]
+  next if sym !~ /X\./;
+  if sym =~ /OP_/ then
+    printcmd = "printf \"buf_ofs=%d data_rem=%d delim_rem=%d #{sym}\\n\", $rbx - (long)((upb_pbdecoder*)($r15))->buf, $r12 - $rbx, $rbp - $rbx"
+  elsif sym =~ /enterjit/ then
+    printcmd = "printf \"#{sym} bytes=%d\\n\", $rcx"
+  else
+    printcmd = "printf \"#{sym}\\n\""
+  end
+  puts "b #{sym}
+commands
+  silent
+  #{printcmd}
+  continue
+end\n\n"
+}
--- a/upb/table.c
+++ b/upb/table.c
@ -53,6 +53,11 @@ typedef bool eqlfunc_t(upb_tabkey k1, upb_tabkey k2);

 /* Base table (shared code) ***************************************************/

+// For when we need to cast away const.
+static upb_tabent *mutable_entries(upb_table *t) {
+  return (upb_tabent*)t->entries;
+}
+
 static bool isfull(upb_table *t) {
  return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD;
 }
@ -66,17 +71,17 @@ static bool init(upb_table *t, upb_ctype_t ctype, uint8_t size_lg2) {
  if (bytes > 0) {
    t->entries = malloc(bytes);
    if (!t->entries) return false;
-    memset((void*)t->entries, 0, bytes);
+    memset(mutable_entries(t), 0, bytes);
  } else {
    t->entries = NULL;
  }
  return true;
 }

-static void uninit(upb_table *t) { free((void*)t->entries); }
+static void uninit(upb_table *t) { free(mutable_entries(t)); }

 static upb_tabent *emptyent(upb_table *t) {
-  upb_tabent *e = (upb_tabent*)t->entries + upb_table_size(t);
+  upb_tabent *e = mutable_entries(t) + upb_table_size(t);
  while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); }
 }

@ -315,9 +320,13 @@ static bool inteql(upb_tabkey k1, upb_tabkey k2) {
  return k1.num == k2.num;
 }

+static _upb_value *mutable_array(upb_inttable *t) {
+  return (_upb_value*)t->array;
+}
+
 static _upb_value *inttable_val(upb_inttable *t, uintptr_t key) {
  if (key < t->array_size) {
-    return upb_arrhas(t->array[key]) ? (_upb_value*)&t->array[key] : NULL;
+    return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
  } else {
    upb_tabent *e =
        (upb_tabent*)findentry(&t->t, upb_intkey(key), &upb_inthash, &inteql);
@ -361,7 +370,7 @@ bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t ctype,
    uninit(&t->t);
    return false;
  }
-  memset((void*)t->array, 0xff, array_bytes);
+  memset(mutable_array(t), 0xff, array_bytes);
  check(t);
  return true;
 }
@ -372,7 +381,7 @@ bool upb_inttable_init(upb_inttable *t, upb_ctype_t ctype) {

 void upb_inttable_uninit(upb_inttable *t) {
  uninit(&t->t);
-  free((void*)t->array);
+  free(mutable_array(t));
 }

 bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
@ -380,7 +389,7 @@ bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
  if (key < t->array_size) {
    assert(!upb_arrhas(t->array[key]));
    t->array_count++;
-    ((_upb_value*)t->array)[key] = val.val;
+    mutable_array(t)[key] = val.val;
  } else {
    if (isfull(&t->t)) {
      // Need to resize the hash part, but we re-use the array part.
@ -428,7 +437,8 @@ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
      if (val) {
        _upb_value_setval(val, t->array[key], t->t.ctype);
      }
-      ((upb_value*)t->array)[key] = upb_value_uint64(-1);
+      _upb_value empty = UPB_ARRAY_EMPTYENT;
+      mutable_array(t)[key] = empty;
      success = true;
    } else {
      success = false;
--- a/upb/table.int.h
+++ b/upb/table.int.h
@ -175,7 +175,13 @@ typedef struct {
  size_t mask;           // Mask to turn hash value -> bucket.
  upb_ctype_t ctype;     // Type of all values.
  uint8_t size_lg2;      // Size of the hash table part is 2^size_lg2 entries.
-  const upb_tabent *entries;   // Hash table.
+
+  // Hash table entries.
+  // Making this const isn't entirely accurate; what we really want is for it to
+  // have the same const-ness as the table it's inside.  But there's no way to
+  // declare that in C.  So we have to make it const so that we can statically
+  // initialize const hash tables.  Then we cast away const when we have to.
+  const upb_tabent *entries;
 } upb_table;

 typedef struct {
@ -186,10 +192,10 @@ typedef struct {
  {{count, mask, ctype, size_lg2, entries}}

 typedef struct {
-  upb_table t;             // For entries that don't fit in the array part.
-  const _upb_value *array;  // Array part of the table.
-  size_t array_size;       // Array part size.
-  size_t array_count;      // Array part number of elements.
+  upb_table t;              // For entries that don't fit in the array part.
+  const _upb_value *array;  // Array part of the table.  See const note above.
+  size_t array_size;        // Array part size.
+  size_t array_count;       // Array part number of elements.
 } upb_inttable;

 #define UPB_INTTABLE_INIT(count, mask, ctype, size_lg2, ent, a, asize, acount) \
@ -198,7 +204,8 @@ typedef struct {
 #define UPB_EMPTY_INTTABLE_INIT(ctype) \
  UPB_INTTABLE_INIT(0, 0, ctype, 0, NULL, NULL, 0, 0)

-#define UPB_ARRAY_EMPTYENT UPB_VALUE_INIT_INT64(-1)
+#define UPB_ARRAY_EMPTYVAL -1
+#define UPB_ARRAY_EMPTYENT UPB_VALUE_INIT_INT64(UPB_ARRAY_EMPTYVAL)

 UPB_INLINE size_t upb_table_size(const upb_table *t) {
  if (t->size_lg2 == 0)
@ -221,7 +228,7 @@ UPB_INLINE const upb_tabent *upb_inthash(const upb_table *t, upb_tabkey key) {
 }

 UPB_INLINE bool upb_arrhas(_upb_value v) {
-  return v.uint64 != (uint64_t)-1;
+  return v.uint64 != (uint64_t)UPB_ARRAY_EMPTYVAL;
 }

 uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
@ -249,9 +256,8 @@ UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) {
 bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val);
 bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val);

-// Looks up key in this table, returning a pointer to the table's internal copy
-// of the user's inserted data, or NULL if this key is not in the table.  The
-// returned pointer is invalidated by inserts.
+// Looks up key in this table, returning "true" if the key was found.
+// If v is non-NULL, copies the value for this key into *v.
 bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v);
 bool upb_strtable_lookup(const upb_strtable *t, const char *key, upb_value *v);