Sync from Google-internal development.

pull/13171/head
Josh Haberman 10 years ago
parent 47b5e0968a
commit d493500abc
  1. 13
      tests/pb/test_decoder.cc
  2. 23
      tests/test_table.cc
  3. 20
      upb/bindings/lua/upb/descriptor.c
  4. 21
      upb/pb/compile_decoder.c
  5. 306
      upb/pb/compile_decoder_x64.c
  6. 82
      upb/pb/compile_decoder_x64.dasc
  7. 3
      upb/pb/decoder.c
  8. 16
      upb/pb/decoder.h
  9. 29
      upb/pb/decoder.int.h
  10. 36
      upb/pb/make-gdb-script.rb
  11. 26
      upb/table.c
  12. 26
      upb/table.int.h

@ -612,6 +612,9 @@ void run_decoder(const string& proto, const string* expected_output) {
if (ok) {
fprintf(stderr, "Didn't expect ok result, but got output: '%s'\n",
output.c_str());
} else if (filter_hash) {
fprintf(stderr, "Failed as we expected, with message: %s\n",
status.error_message());
}
ASSERT(!ok);
}
@ -838,6 +841,10 @@ void test_invalid() {
// Field number is 0.
assert_does_not_parse(
cat( tag(0, UPB_WIRE_TYPE_DELIMITED), varint(0) ));
// The previous test alone did not catch this particular pattern which could
// corrupt the internal state.
assert_does_not_parse(
cat( tag(0, UPB_WIRE_TYPE_64BIT), uint64(0) ));
// Field number is too large.
assert_does_not_parse(
@ -928,6 +935,12 @@ void test_valid() {
submsg(12345, string(" ")),
"<\n>\n");
// This triggered a previous bug in the decoder.
assert_successful_parse(
cat( tag(UPB_DESCRIPTOR_TYPE_SFIXED32, UPB_WIRE_TYPE_VARINT),
varint(0) ),
"<\n>\n");
assert_successful_parse(
cat(
submsg(UPB_DESCRIPTOR_TYPE_MESSAGE,

@ -284,6 +284,26 @@ int32_t *get_contiguous_keys(int32_t num) {
return buf;
}
void test_delete() {
upb_inttable t;
upb_inttable_init(&t, UPB_CTYPE_BOOL);
upb_inttable_insert(&t, 0, upb_value_bool(true));
upb_inttable_insert(&t, 2, upb_value_bool(true));
upb_inttable_insert(&t, 4, upb_value_bool(true));
upb_inttable_compact(&t);
upb_inttable_remove(&t, 0, NULL);
upb_inttable_remove(&t, 2, NULL);
upb_inttable_remove(&t, 4, NULL);
upb_inttable_iter iter;
for (upb_inttable_begin(&iter, &t); !upb_inttable_done(&iter);
upb_inttable_next(&iter)) {
ASSERT(false);
}
upb_inttable_uninit(&t);
}
extern "C" {
int run_tests(int argc, char *argv[]) {
@ -336,6 +356,9 @@ int run_tests(int argc, char *argv[]) {
}
test_inttable(keys4, 64, "Table size: 64, keys: 1-32 and 10133-10164 ====\n");
delete[] keys4;
test_delete();
return 0;
}

@ -1,20 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
* A Lua extension for upb/descriptor.
*/
#include "upb/bindings/lua/upb.h"
static const struct luaL_Reg toplevel_m[] = {
{NULL, NULL}
};
int luaopen_upb_descriptor(lua_State *L) {
lupb_newlib(L, "upb.descriptor", toplevel_m);
return 1; // Return package table.
}

@ -509,9 +509,20 @@ static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
return selector;
}
// Takes an existing, primary dispatch table entry and repacks it with a
// different alternate wire type. Called when we are inserting a secondary
// dispatch table entry for an alternate wire type.
static uint64_t repack(uint64_t dispatch, int new_wt2) {
uint64_t ofs;
uint8_t wt1;
uint8_t old_wt2;
upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
assert(old_wt2 == NO_WIRE_TYPE); // wt2 should not be set yet.
return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
}
// Marks the current bytecode position as the dispatch target for this message,
// field, and wire type.
//
static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
const upb_fielddef *f, int wire_type) {
// Offset is relative to msg base.
@ -521,12 +532,12 @@ static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
upb_value v;
if (upb_inttable_remove(d, fn, &v)) {
// TODO: prioritize based on packed setting in .proto file.
uint64_t oldval = upb_value_getuint64(v);
assert(((oldval >> 8) & 0xff) == 0); // wt2 should not be set yet.
upb_inttable_insert(d, fn, upb_value_uint64(oldval | (wire_type << 8)));
uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
upb_inttable_insert(d, fn, upb_value_uint64(repacked));
upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
} else {
upb_inttable_insert(d, fn, upb_value_uint64((ofs << 16) | wire_type));
uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
upb_inttable_insert(d, fn, upb_value_uint64(val));
}
}

@ -10,11 +10,48 @@
#include <dlfcn.h>
#include <stdio.h>
#include <sys/mman.h>
#include <unistd.h>
#include "upb/pb/decoder.h"
#include "upb/pb/decoder.int.h"
#include "upb/pb/varint.int.h"
#include "upb/shim/shim.h"
// To debug the JIT:
//
// 1. Uncomment:
// #define UPB_JIT_LOAD_SO
//
// Note: this mode requires that we can shell out to gcc.
//
// 2. Run the test once locally. This will load the JIT code by building a
// .so (/tmp/upb-jit-code.so) and using dlopen, so more of the tooling will
// work properly (like GDB).
//
// IF YOU ALSO WANT AUTOMATIC JIT DEBUG OUTPUT:
//
// 3. Run: upb/pb/make-gdb-script.rb > script.gdb. This reads
// /tmp/upb-jit-code.so as input and generates a GDB script that is specific
// to this jit code.
//
// 4. Run: gdb --command=script.gdb --args path/to/test
// This will drop you to a GDB prompt which you can now use normally.
// But when you run the test it will print a message to stdout every time
// the JIT executes assembly for a particular bytecode. Sample output:
//
// X.enterjit bytes=18
// buf_ofs=1 data_rem=17 delim_rem=-2 X.0x6.OP_PARSE_DOUBLE
// buf_ofs=9 data_rem=9 delim_rem=-10 X.0x7.OP_CHECKDELIM
// buf_ofs=9 data_rem=9 delim_rem=-10 X.0x8.OP_TAG1
// X.0x3.dispatch.DecoderTest
// X.parse_unknown
// X.0x3.dispatch.DecoderTest
// X.decode_unknown_tag_fallback
// X.exitjit
//
// This output should roughly correspond to the output that the bytecode
// interpreter emits when compiled with UPB_DUMP_BYTECODE (modulo some
// extra JIT-specific output).
// These defines are necessary for DynASM codegen.
// See dynasm/dasm_proto.h for more info.
#define Dst_DECL jitcompiler *jc
@ -31,8 +68,6 @@
#define MAP_ANONYMOUS MAP_ANON
#endif
#define DECODE_EOF -3
typedef struct {
mgroup *group;
uint32_t *pc;
@ -40,18 +75,39 @@ typedef struct {
// This pointer is allocated by dasm_init() and freed by dasm_free().
struct dasm_State *dynasm;
// Maps arbitrary void* -> pclabel.
upb_inttable pclabels;
upb_inttable pcdefined;
// Maps some key (an arbitrary void*) to a pclabel.
//
// The pclabel represents a location in the generated code -- DynASM exposes
// a pclabel -> (machine code offset) lookup function.
//
// The key can be anything. There are two main kinds of keys:
// - bytecode location -- the void* points to the bytecode instruction
// itself. We can then use this to generate jumps to this instruction.
// - other object (like dispatch table). We use these to represent parts
// of the generated code that do not exactly correspond to a bytecode
// instruction.
upb_inttable jmptargets;
// For marking labels that should go into the generated code.
// Maps pclabel -> char* label (string is owned by the table).
upb_inttable asmlabels;
#ifndef NDEBUG
// Like jmptargets, but members are present in the table when they have had
// define_jmptarget() (as opposed to jmptarget) called. Used to verify that
// define_jmptarget() is called exactly once for every target.
// The value is ignored.
upb_inttable jmpdefined;
// For checking that two asmlabels aren't defined for the same byte.
int lastlabelofs;
#endif
#ifdef UPB_JIT_LOAD_SO
// For marking labels that should go into the generated code.
// Maps pclabel -> char* label (string is owned by the table).
upb_inttable asmlabels;
#endif
// The total number of pclabels currently defined.
// Note that this contains both jmptargets and asmlabels, which both use
// pclabels but for different purposes.
uint32_t pclabel_count;
// Used by DynASM to store globals.
@ -59,10 +115,16 @@ typedef struct {
} jitcompiler;
// Functions called by codegen.
static int pclabel(jitcompiler *jc, const void *here);
static int define_pclabel(jitcompiler *jc, const void *here);
static int jmptarget(jitcompiler *jc, const void *key);
static int define_jmptarget(jitcompiler *jc, const void *key);
static void asmlabel(jitcompiler *jc, const char *fmt, ...);
static int pcofs(jitcompiler* jc);
static int alloc_pclabel(jitcompiler *jc);
#ifdef UPB_JIT_LOAD_SO
static char *upb_vasprintf(const char *fmt, va_list ap);
static char *upb_asprintf(const char *fmt, ...);
#endif
#include "dynasm/dasm_proto.h"
#include "dynasm/dasm_x86.h"
@ -72,10 +134,14 @@ static jitcompiler *newjitcompiler(mgroup *group) {
jitcompiler *jc = malloc(sizeof(jitcompiler));
jc->group = group;
jc->pclabel_count = 0;
upb_inttable_init(&jc->jmptargets, UPB_CTYPE_UINT32);
#ifndef NDEBUG
jc->lastlabelofs = -1;
upb_inttable_init(&jc->pclabels, UPB_CTYPE_UINT32);
upb_inttable_init(&jc->pcdefined, UPB_CTYPE_BOOL);
upb_inttable_init(&jc->jmpdefined, UPB_CTYPE_BOOL);
#endif
#ifdef UPB_JIT_LOAD_SO
upb_inttable_init(&jc->asmlabels, UPB_CTYPE_PTR);
#endif
jc->globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*jc->globals));
dasm_init(jc, 1);
@ -86,37 +152,106 @@ static jitcompiler *newjitcompiler(mgroup *group) {
}
static void freejitcompiler(jitcompiler *jc) {
#ifdef UPB_JIT_LOAD_SO
upb_inttable_iter i;
upb_inttable_begin(&i, &jc->asmlabels);
for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
free(upb_value_getptr(upb_inttable_iter_value(&i)));
}
upb_inttable_uninit(&jc->asmlabels);
upb_inttable_uninit(&jc->pclabels);
upb_inttable_uninit(&jc->pcdefined);
#endif
#ifndef NDEBUG
upb_inttable_uninit(&jc->jmpdefined);
#endif
upb_inttable_uninit(&jc->jmptargets);
dasm_free(jc);
free(jc->globals);
free(jc);
}
// Returns a pclabel associated with the given arbitrary pointer.
static int pclabel(jitcompiler *jc, const void *here) {
#ifdef UPB_JIT_LOAD_SO
// Like sprintf except allocates the string, which is returned and owned by the
// caller.
//
// Like the GNU extension asprintf(), except we abort on error (since this is
// only for debugging).
static char *upb_vasprintf(const char *fmt, va_list args) {
// Run once to get the length of the string.
va_list args_copy;
va_copy(args_copy, args);
int len = vsnprintf(NULL, 0, fmt, args_copy);
va_end(args_copy);
char *ret = malloc(len + 1); // + 1 for NULL terminator.
if (!ret) abort();
int written = vsnprintf(ret, len + 1, fmt, args);
UPB_ASSERT_VAR(written, written == len);
return ret;
}
static char *upb_asprintf(const char *fmt, ...) {
va_list args;
va_start(args, fmt);
char *ret = upb_vasprintf(fmt, args);
va_end(args);
return ret;
}
#endif
static int alloc_pclabel(jitcompiler *jc) {
int newpc = jc->pclabel_count++;
dasm_growpc(jc, jc->pclabel_count);
return newpc;
}
static bool try_getjmptarget(jitcompiler *jc, const void *key, int *pclabel) {
upb_value v;
bool found = upb_inttable_lookupptr(&jc->pclabels, here, &v);
if (!found) {
upb_value_setuint32(&v, jc->pclabel_count++);
dasm_growpc(jc, jc->pclabel_count);
upb_inttable_insertptr(&jc->pclabels, here, v);
if (upb_inttable_lookupptr(&jc->jmptargets, key, &v)) {
*pclabel = upb_value_getuint32(v);
return true;
} else {
return false;
}
}
// Gets the pclabel for this bytecode location's jmptarget. Requires that the
// jmptarget() has been previously defined.
static int getjmptarget(jitcompiler *jc, const void *key) {
int pclabel;
assert(upb_inttable_lookupptr(&jc->jmpdefined, key, NULL));
bool ok = try_getjmptarget(jc, key, &pclabel);
UPB_ASSERT_VAR(ok, ok);
return pclabel;
}
// Returns a pclabel that serves as a jmp target for the given bytecode pointer.
// This should only be called for code that is jumping to the target; code
// defining the target should use define_jmptarget().
//
// Creates/allocates a pclabel for this target if one does not exist already.
static int jmptarget(jitcompiler *jc, const void *key) {
int pclabel;
if (!try_getjmptarget(jc, key, &pclabel)) {
pclabel = alloc_pclabel(jc);
upb_inttable_insertptr(&jc->jmptargets, key, upb_value_uint32(pclabel));
}
return upb_value_getuint32(v);
return pclabel;
}
// Defines a pclabel associated with the given arbitrary pointer.
// May only be called once (to avoid redefining the pclabel).
static int define_pclabel(jitcompiler *jc, const void *here) {
// Will assert-fail if it already exists.
upb_inttable_insertptr(&jc->pcdefined, here, upb_value_bool(true));
return pclabel(jc, here);
// Defines a pclabel associated with the given bytecode location.
// Must be called exactly once by the code that is generating the code for this
// bytecode.
//
// Must be called exactly once before bytecode generation is complete (this is a
// sanity check to make sure the label is defined exactly once).
static int define_jmptarget(jitcompiler *jc, const void *key) {
#ifndef NDEBUG
upb_inttable_insertptr(&jc->jmpdefined, key, upb_value_bool(true));
#endif
return jmptarget(jc, key);
}
// Returns a bytecode pc offset relative to the beginning of the group's code.
@ -126,27 +261,35 @@ static int pcofs(jitcompiler *jc) {
static void upb_reg_jit_gdb(jitcompiler *jc);
static int getpclabel(jitcompiler *jc, const void *target) {
return dasm_getpclabel(jc, pclabel(jc, target));
// Returns a machine code offset corresponding to the given key.
// Requires that this key was defined with define_jmptarget.
static int machine_code_ofs(jitcompiler *jc, const void *key) {
int pclabel = getjmptarget(jc, key);
// Despite its name, this function takes a pclabel and returns the
// corresponding machine code offset.
return dasm_getpclabel(jc, pclabel);
}
// Given a pcofs relative to method, returns the machine code offset for it
// (relative to the beginning of the machine code).
int nativeofs(jitcompiler *jc, const upb_pbdecodermethod *method, int pcofs) {
void *target = jc->group->bytecode + method->code_base.ofs + pcofs;
return getpclabel(jc, target);
// Returns a machine code offset corresponding to the given method-relative
// bytecode offset. Note that the bytecode offset is relative to the given
// method, but the returned machine code offset is relative to the beginning of
// *all* the machine code.
static int machine_code_ofs2(jitcompiler *jc, const upb_pbdecodermethod *method,
int pcofs) {
void *bc_target = jc->group->bytecode + method->code_base.ofs + pcofs;
return machine_code_ofs(jc, bc_target);
}
// Given a pcofs relative to this method's base, returns a machine code offset
// relative to pclabel(dispatch->array) (which is used in jitdispatch as the
// relative to jmptarget(dispatch->array) (which is used in jitdispatch as the
// machine code base for dispatch table lookups).
uint32_t dispatchofs(jitcompiler *jc, const upb_pbdecodermethod *method,
int pcofs) {
int ofs1 = getpclabel(jc, method->dispatch.array);
int ofs2 = nativeofs(jc, method, pcofs);
assert(ofs1 > 0);
assert(ofs2 > 0);
int ret = ofs2 - ofs1;
int mc_base = machine_code_ofs(jc, method->dispatch.array);
int mc_target = machine_code_ofs2(jc, method, pcofs);
assert(mc_base > 0);
assert(mc_target > 0);
int ret = mc_target - mc_base;
assert(ret > 0);
return ret;
}
@ -160,31 +303,42 @@ static void patchdispatch(jitcompiler *jc) {
method->is_native_ = true;
upb_inttable *dispatch = &method->dispatch;
// Remove DISPATCH_ENDMSG -- only the bytecode interpreter needs it.
// And leaving it around will cause us to find field 0 improperly.
upb_inttable_remove(dispatch, DISPATCH_ENDMSG, NULL);
upb_inttable_iter i2;
upb_inttable_begin(&i2, dispatch);
for (; !upb_inttable_done(&i2); upb_inttable_next(&i2)) {
uintptr_t key = upb_inttable_iter_key(&i2);
if (key == 0) continue;
uint64_t val = upb_value_getuint64(upb_inttable_iter_value(&i2));
uint64_t newval;
if (key <= UPB_MAX_FIELDNUMBER) {
// Primary slot.
uint64_t oldofs = val >> 16;
uint64_t newofs = dispatchofs(jc, method, oldofs);
newval = (val & 0xffff) | (newofs << 16);
uint64_t ofs;
uint8_t wt1;
uint8_t wt2;
upb_pbdecoder_unpackdispatch(val, &ofs, &wt1, &wt2);
// Update offset and repack.
ofs = dispatchofs(jc, method, ofs);
newval = upb_pbdecoder_packdispatch(ofs, wt1, wt2);
assert((int64_t)newval > 0);
} else {
// Secondary slot. Since we have 64 bits for the value, we use an
// absolute offset.
newval = (uint64_t)(jc->group->jit_code + nativeofs(jc, method, val));
int mcofs = machine_code_ofs2(jc, method, val);
newval = (uint64_t)(jc->group->jit_code + mcofs);
}
bool ok = upb_inttable_replace(dispatch, key, upb_value_uint64(newval));
UPB_ASSERT_VAR(ok, ok);
}
// Set this only *after* we have patched the offsets (nativeofs() above
// reads this).
method->code_base.ptr = jc->group->jit_code + getpclabel(jc, method);
// Update entry point for this method to point at mc base instead of bc
// base. Set this only *after* we have patched the offsets
// (machine_code_ofs2() uses this).
method->code_base.ptr = jc->group->jit_code + machine_code_ofs(jc, method);
upb_byteshandler *h = &method->input_handler_;
upb_byteshandler_setstartstr(h, upb_pbdecoder_startjit, NULL);
@ -193,10 +347,8 @@ static void patchdispatch(jitcompiler *jc) {
}
}
// Define for JIT debugging.
//#define UPB_JIT_LOAD_SO
#ifdef UPB_JIT_LOAD_SO
static void load_so(jitcompiler *jc) {
// Dump to a .so file in /tmp and load that, so all the tooling works right
// (for example, debuggers and profilers will see symbol names for the JIT-ted
@ -204,10 +356,15 @@ static void load_so(jitcompiler *jc) {
// interface is only used/understood by GDB. Hopefully a standard will
// develop for registering JIT-ted code that all tools will recognize,
// rendering this obsolete.
//
// Requires that gcc is available from the command-line.
// Convert all asm labels from pclabel offsets to machine code offsets.
// jc->asmlabels maps:
// pclabel -> char* label
//
// Use this to build mclabels, which maps:
// machine code offset -> char* label
//
// Then we can use mclabels to emit the labels as we iterate over the bytes we
// are outputting.
upb_inttable_iter i;
upb_inttable mclabels;
upb_inttable_init(&mclabels, UPB_CTYPE_PTR);
@ -218,8 +375,16 @@ static void load_so(jitcompiler *jc) {
upb_inttable_iter_value(&i));
}
FILE *f = fopen("/tmp/upb-jit-code.s", "w");
if (f) {
// We write a .s file in text format, as input to the assembler.
// Then we run gcc to turn it into a .so file.
//
// The last "XXXXXX" will be replaced with something randomly generated by
// mkstmemp(). We don't add ".s" to this filename because it makes the string
// processing for mkstemp() and system() more complicated.
char s_filename[] = "/tmp/upb-jit-codeXXXXXX";
int fd = mkstemp(s_filename);
FILE *f;
if (fd >= 0 && (f = fdopen(fd, "wb")) != NULL) {
uint8_t *jit_code = (uint8_t*)jc->group->jit_code;
fputs(" .text\n\n", f);
size_t linelen = 0;
@ -242,17 +407,33 @@ static void load_so(jitcompiler *jc) {
fputs("\n", f);
fclose(f);
} else {
fprintf(stderr, "Couldn't open /tmp/upb-jit-code.s for writing\n");
fprintf(stderr, "Error opening tmp file for JIT debug output.\n");
abort();
}
// TODO: racy
if (system("gcc -shared -o /tmp/upb-jit-code.so /tmp/upb-jit-code.s") != 0) {
fprintf(stderr, "Error compiling upb-jit-code.s\n");
// This is exploitable if you have an adversary on your machine who can write
// to this tmp directory. But this is just for debugging so we don't worry
// too much about that. It shouldn't be prone to races against concurrent
// (non-adversarial) upb JIT's because we used mkstemp().
char *cmd = upb_asprintf("gcc -shared -o %s.so -x assembler %s", s_filename,
s_filename);
if (system(cmd) != 0) {
fprintf(stderr, "Error compiling %s\n", s_filename);
abort();
}
free(cmd);
char *so_filename = upb_asprintf("%s.so", s_filename);
jc->group->dl = dlopen("/tmp/upb-jit-code.so", RTLD_LAZY);
// Some convenience symlinks.
// This is racy, but just for convenience.
unlink("/tmp/upb-jit-code.so");
unlink("/tmp/upb-jit-code.s");
symlink(s_filename, "/tmp/upb-jit-code.s");
symlink(so_filename, "/tmp/upb-jit-code.so");
jc->group->dl = dlopen(so_filename, RTLD_LAZY);
free(so_filename);
if (!jc->group->dl) {
fprintf(stderr, "Couldn't dlopen(): %s\n", dlerror());
abort();
@ -267,6 +448,7 @@ static void load_so(jitcompiler *jc) {
upb_inttable_uninit(&mclabels);
}
#endif
void upb_pbdecoder_jit(mgroup *group) {

@ -133,33 +133,44 @@
| add PTR, 1
|.endmacro
#define DECODE_EOF -3
static upb_func *gethandler(const upb_handlers *h, upb_selector_t sel) {
return h ? upb_handlers_gethandler(h, sel) : NULL;
}
// Defines an "assembly label" for the current code generation offset.
// This label exists *purely* for debugging purposes: it is emitted into
// the .so, and printed as part of JIT debugging output when UPB_JIT_LOAD_SO is
// defined.
//
// We would define this in the .c file except that it conditionally defines a
// pclabel.
static void asmlabel(jitcompiler *jc, const char *fmt, ...) {
#ifndef NDEBUG
int ofs = jc->dynasm->section->ofs;
assert(ofs != jc->lastlabelofs);
jc->lastlabelofs = ofs;
#endif
#ifndef UPB_JIT_LOAD_SO
UPB_UNUSED(jc);
UPB_UNUSED(fmt);
#else
va_list args;
va_start(args, fmt);
// Run once to get the length of the string.
va_list args_copy;
va_copy(args_copy, args);
int len = vsnprintf(NULL, 0, fmt, args_copy);
va_end(args_copy);
char *str = malloc(len + 1); // + 1 for NULL terminator.
if (!str) exit(1);
int written = vsnprintf(str, len + 1, fmt, args);
char *str = upb_vasprintf(fmt, args);
va_end(args);
UPB_ASSERT_VAR(written, written == len);
uint32_t label = jc->pclabel_count++;
dasm_growpc(jc, jc->pclabel_count);
|=>label:
upb_inttable_insert(&jc->asmlabels, label, upb_value_ptr(str));
}
static upb_func *gethandler(const upb_handlers *h, upb_selector_t sel) {
return h ? upb_handlers_gethandler(h, sel) : NULL;
int pclabel = alloc_pclabel(jc);
// Normally we would prefer to allocate this inline with the codegen,
// ie.
// |=>asmlabel(...)
// But since we do this conditionally, only when UPB_JIT_LOAD_SO is defined,
// we do it here instead.
|=>pclabel:
upb_inttable_insert(&jc->asmlabels, pclabel, upb_value_ptr(str));
#endif
}
// Should only be called when the associated handler is known to exist.
@ -696,7 +707,7 @@ static void jitdispatch(jitcompiler *jc,
// this characteristic.
bool has_multi_wiretype = has_hash_entries;
|=>define_pclabel(jc, &method->dispatch):
|=>define_jmptarget(jc, &method->dispatch):
|1:
// Decode the field tag.
| mov aword DECODER->checkpoint, PTR
@ -751,8 +762,17 @@ static void jitdispatch(jitcompiler *jc,
| jne >5
}
| shr rax, 16
|
| // Load the machine code address from the table entry.
| // The table entry is relative to the dispatch->array jmptarget
| // (patchdispatch() took care of this) which is the same as
| // local label "4". The "lea" is really just trying to do
| // lea rax, [>4 + rax]
| //
| // But we can't write that directly for some reason, so we use
| // rdx as a temporary.
| lea rdx, [>4]
|=>define_pclabel(jc, dispatch->array):
|=>define_jmptarget(jc, dispatch->array):
|4:
| add rax, rdx
| ret
@ -812,7 +832,7 @@ static void jittag(jitcompiler *jc, uint64_t tag, int n, int ofs,
| cmp eax, DECODE_MISMATCH
| je >3
| cmp eax, DECODE_EOF
| je =>pclabel(jc, delimend)
| je =>jmptarget(jc, delimend)
| jmp >5
|1:
@ -844,12 +864,12 @@ static void jittag(jitcompiler *jc, uint64_t tag, int n, int ofs,
| je >4
|3:
if (ofs == 0) {
| call =>pclabel(jc, &method->dispatch)
| call =>jmptarget(jc, &method->dispatch)
| test rax, rax
| jz =>pclabel(jc, delimend)
| jz =>jmptarget(jc, delimend)
| jmp rax
} else {
| jmp =>pclabel(jc, jc->pc + ofs)
| jmp =>jmptarget(jc, jc->pc + ofs)
}
|4:
| add PTR, n
@ -874,7 +894,7 @@ static void jitbytecode(jitcompiler *jc) {
// Skipped for SETDISPATCH because it should point at the function
// prologue, not the dispatch function that is emitted first.
// TODO: optimize this to only define pclabels that are actually used.
|=>define_pclabel(jc, jc->pc):
|=>define_jmptarget(jc, jc->pc):
}
jc->pc++;
@ -936,8 +956,8 @@ static void jitbytecode(jitcompiler *jc) {
// Emit function prologue for new method.
asmlabel(jc, "0x%lx.parse.%s", pcofs(jc), msgname);
|=>define_pclabel(jc, op_pc):
|=>define_pclabel(jc, method):
|=>define_jmptarget(jc, op_pc):
|=>define_jmptarget(jc, method):
| sub rsp, 8
break;
@ -1082,13 +1102,13 @@ static void jitbytecode(jitcompiler *jc) {
break;
case OP_CHECKDELIM:
| cmp DELIMEND, PTR
| je =>pclabel(jc, jc->pc + longofs)
| je =>jmptarget(jc, jc->pc + longofs)
break;
case OP_CALL:
| call =>pclabel(jc, jc->pc + longofs)
| call =>jmptarget(jc, jc->pc + longofs)
break;
case OP_BRANCH:
| jmp =>pclabel(jc, jc->pc + longofs);
| jmp =>jmptarget(jc, jc->pc + longofs);
break;
case OP_RET:
|9:

@ -567,7 +567,8 @@ static int32_t dispatch(upb_pbdecoder *d) {
// Lookup tag. Because of packed/non-packed compatibility, we have to
// check the wire type against two possibilities.
upb_value val;
if (upb_inttable_lookup32(dispatch, fieldnum, &val)) {
if (fieldnum != DISPATCH_ENDMSG &&
upb_inttable_lookup32(dispatch, fieldnum, &val)) {
uint64_t v = upb_value_getuint64(val);
if (wire_type == (v & 0xff)) {
d->pc = d->top->base + (v >> 16);

@ -146,19 +146,9 @@ UPB_DEFINE_STRUCT(upb_pbdecodermethod, upb_refcounted,
// The destination handlers this method is bound to. We own a ref.
const upb_handlers *dest_handlers_;
// The dispatch table layout is:
// [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ]
//
// If wt1 matches, jump to the 48-bit offset. If wt2 matches, lookup
// (UPB_MAX_FIELDNUMBER + fieldnum) and jump there.
//
// We need two wire types because of packed/non-packed compatibility. A
// primitive repeated field can use either wire type and be valid. While we
// could key the table on fieldnum+wiretype, the table would be 8x sparser.
//
// Storing two wire types in the primary value allows us to quickly rule out
// the second wire type without needing to do a separate lookup (this case is
// less common than an unknown field).
// Dispatch table -- used by both bytecode decoder and JIT when encountering a
// field number that wasn't the one we were expecting to see. See
// decoder.int.h for the layout of this table.
upb_inttable dispatch;
));

@ -147,6 +147,35 @@ void upb_pbdecoder_freejit(mgroup *group);
// RET) for branching to when we find an appropriate ENDGROUP tag.
#define DISPATCH_ENDMSG 0
// It's important to use this invalid wire type instead of 0 (which is a valid
// wire type).
#define NO_WIRE_TYPE 0xff
// The dispatch table layout is:
// [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ]
//
// If wt1 matches, jump to the 48-bit offset. If wt2 matches, lookup
// (UPB_MAX_FIELDNUMBER + fieldnum) and jump there.
//
// We need two wire types because of packed/non-packed compatibility. A
// primitive repeated field can use either wire type and be valid. While we
// could key the table on fieldnum+wiretype, the table would be 8x sparser.
//
// Storing two wire types in the primary value allows us to quickly rule out
// the second wire type without needing to do a separate lookup (this case is
// less common than an unknown field).
UPB_INLINE uint64_t upb_pbdecoder_packdispatch(uint64_t ofs, uint8_t wt1,
uint8_t wt2) {
return (ofs << 16) | (wt2 << 8) | wt1;
}
UPB_INLINE void upb_pbdecoder_unpackdispatch(uint64_t dispatch, uint64_t *ofs,
uint8_t *wt1, uint8_t *wt2) {
*wt1 = (uint8_t)dispatch;
*wt2 = (uint8_t)(dispatch >> 8);
*ofs = dispatch >> 16;
}
// All of the functions in decoder.c that return int32_t return values according
// to the following scheme:
// 1. negative values indicate a return code from the following list.

@ -0,0 +1,36 @@
#!/usr/bin/ruby
puts "set width 0
set height 0
set verbose off\n\n"
IO.popen("nm -S /tmp/upb-jit-code.so").each_line { |line|
# Input lines look like this:
# 000000000000575a T X.0x10.OP_CHECKDELIM
#
# For each one we want to emit a command that looks like:
# b X.0x10.OP_CHECKDELIM
# commands
# silent
# printf "buf_ofs=%d data_rem=%d delim_rem=%d X.0x10.OP_CHECKDELIM\n", $rbx - (long)((upb_pbdecoder*)($r15))->buf, $r12 - $rbx, $rbp - $rbx
# continue
# end
parts = line.split
next if parts[1] != "T"
sym = parts[2]
next if sym !~ /X\./;
if sym =~ /OP_/ then
printcmd = "printf \"buf_ofs=%d data_rem=%d delim_rem=%d #{sym}\\n\", $rbx - (long)((upb_pbdecoder*)($r15))->buf, $r12 - $rbx, $rbp - $rbx"
elsif sym =~ /enterjit/ then
printcmd = "printf \"#{sym} bytes=%d\\n\", $rcx"
else
printcmd = "printf \"#{sym}\\n\""
end
puts "b #{sym}
commands
silent
#{printcmd}
continue
end\n\n"
}

@ -53,6 +53,11 @@ typedef bool eqlfunc_t(upb_tabkey k1, upb_tabkey k2);
/* Base table (shared code) ***************************************************/
// For when we need to cast away const.
static upb_tabent *mutable_entries(upb_table *t) {
return (upb_tabent*)t->entries;
}
static bool isfull(upb_table *t) {
return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD;
}
@ -66,17 +71,17 @@ static bool init(upb_table *t, upb_ctype_t ctype, uint8_t size_lg2) {
if (bytes > 0) {
t->entries = malloc(bytes);
if (!t->entries) return false;
memset((void*)t->entries, 0, bytes);
memset(mutable_entries(t), 0, bytes);
} else {
t->entries = NULL;
}
return true;
}
static void uninit(upb_table *t) { free((void*)t->entries); }
static void uninit(upb_table *t) { free(mutable_entries(t)); }
static upb_tabent *emptyent(upb_table *t) {
upb_tabent *e = (upb_tabent*)t->entries + upb_table_size(t);
upb_tabent *e = mutable_entries(t) + upb_table_size(t);
while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); }
}
@ -315,9 +320,13 @@ static bool inteql(upb_tabkey k1, upb_tabkey k2) {
return k1.num == k2.num;
}
static _upb_value *mutable_array(upb_inttable *t) {
return (_upb_value*)t->array;
}
static _upb_value *inttable_val(upb_inttable *t, uintptr_t key) {
if (key < t->array_size) {
return upb_arrhas(t->array[key]) ? (_upb_value*)&t->array[key] : NULL;
return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
} else {
upb_tabent *e =
(upb_tabent*)findentry(&t->t, upb_intkey(key), &upb_inthash, &inteql);
@ -361,7 +370,7 @@ bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t ctype,
uninit(&t->t);
return false;
}
memset((void*)t->array, 0xff, array_bytes);
memset(mutable_array(t), 0xff, array_bytes);
check(t);
return true;
}
@ -372,7 +381,7 @@ bool upb_inttable_init(upb_inttable *t, upb_ctype_t ctype) {
void upb_inttable_uninit(upb_inttable *t) {
uninit(&t->t);
free((void*)t->array);
free(mutable_array(t));
}
bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
@ -380,7 +389,7 @@ bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
if (key < t->array_size) {
assert(!upb_arrhas(t->array[key]));
t->array_count++;
((_upb_value*)t->array)[key] = val.val;
mutable_array(t)[key] = val.val;
} else {
if (isfull(&t->t)) {
// Need to resize the hash part, but we re-use the array part.
@ -428,7 +437,8 @@ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
if (val) {
_upb_value_setval(val, t->array[key], t->t.ctype);
}
((upb_value*)t->array)[key] = upb_value_uint64(-1);
_upb_value empty = UPB_ARRAY_EMPTYENT;
mutable_array(t)[key] = empty;
success = true;
} else {
success = false;

@ -175,7 +175,13 @@ typedef struct {
size_t mask; // Mask to turn hash value -> bucket.
upb_ctype_t ctype; // Type of all values.
uint8_t size_lg2; // Size of the hash table part is 2^size_lg2 entries.
const upb_tabent *entries; // Hash table.
// Hash table entries.
// Making this const isn't entirely accurate; what we really want is for it to
// have the same const-ness as the table it's inside. But there's no way to
// declare that in C. So we have to make it const so that we can statically
// initialize const hash tables. Then we cast away const when we have to.
const upb_tabent *entries;
} upb_table;
typedef struct {
@ -186,10 +192,10 @@ typedef struct {
{{count, mask, ctype, size_lg2, entries}}
typedef struct {
upb_table t; // For entries that don't fit in the array part.
const _upb_value *array; // Array part of the table.
size_t array_size; // Array part size.
size_t array_count; // Array part number of elements.
upb_table t; // For entries that don't fit in the array part.
const _upb_value *array; // Array part of the table. See const note above.
size_t array_size; // Array part size.
size_t array_count; // Array part number of elements.
} upb_inttable;
#define UPB_INTTABLE_INIT(count, mask, ctype, size_lg2, ent, a, asize, acount) \
@ -198,7 +204,8 @@ typedef struct {
#define UPB_EMPTY_INTTABLE_INIT(ctype) \
UPB_INTTABLE_INIT(0, 0, ctype, 0, NULL, NULL, 0, 0)
#define UPB_ARRAY_EMPTYENT UPB_VALUE_INIT_INT64(-1)
#define UPB_ARRAY_EMPTYVAL -1
#define UPB_ARRAY_EMPTYENT UPB_VALUE_INIT_INT64(UPB_ARRAY_EMPTYVAL)
UPB_INLINE size_t upb_table_size(const upb_table *t) {
if (t->size_lg2 == 0)
@ -221,7 +228,7 @@ UPB_INLINE const upb_tabent *upb_inthash(const upb_table *t, upb_tabkey key) {
}
UPB_INLINE bool upb_arrhas(_upb_value v) {
return v.uint64 != (uint64_t)-1;
return v.uint64 != (uint64_t)UPB_ARRAY_EMPTYVAL;
}
uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
@ -249,9 +256,8 @@ UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) {
bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val);
bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val);
// Looks up key in this table, returning a pointer to the table's internal copy
// of the user's inserted data, or NULL if this key is not in the table. The
// returned pointer is invalidated by inserts.
// Looks up key in this table, returning "true" if the key was found.
// If v is non-NULL, copies the value for this key into *v.
bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v);
bool upb_strtable_lookup(const upb_strtable *t, const char *key, upb_value *v);

Loading…
Cancel
Save