- rewritten decoder; interpreted decoder is bytecode-based, JIT decoder no longer falls back to the interpreter. - C++ improvements: C++11-compatible iterators, upb::reffed_ptr for RAII refcounting, better upcast/downcast support. - removed the gross upb_value abstraction from public upb.h.pull/13171/head
parent
61109fca1f
commit
26d98ca94f
45 changed files with 5450 additions and 3273 deletions
@ -0,0 +1,855 @@ |
||||
/*
|
||||
* upb - a minimalist implementation of protocol buffers. |
||||
* |
||||
* Copyright (c) 2013 Google Inc. See LICENSE for details. |
||||
* Author: Josh Haberman <jhaberman@gmail.com> |
||||
* |
||||
* Code to compile a upb::MessageDef into bytecode for decoding that message. |
||||
* Bytecode definition is in decoder.int.h. |
||||
*/ |
||||
|
||||
#include <stdarg.h> |
||||
#include "upb/pb/decoder.int.h" |
||||
#include "upb/pb/varint.int.h" |
||||
#include "upb/bytestream.h" |
||||
|
||||
#ifdef UPB_DUMP_BYTECODE |
||||
#include <stdio.h> |
||||
#endif |
||||
|
||||
#define MAXLABEL 5 |
||||
#define EMPTYLABEL -1 |
||||
|
||||
/* upb_pbdecodermethod ********************************************************/ |
||||
|
||||
static upb_pbdecodermethod *newmethod(const upb_msgdef *msg, |
||||
const upb_handlers *dest_handlers) { |
||||
upb_pbdecodermethod *ret = malloc(sizeof(upb_pbdecodermethod)); |
||||
ret->msg = msg; |
||||
ret->dest_handlers = dest_handlers; |
||||
ret->native_code = false; // If we JIT, it will update this later.
|
||||
upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64); |
||||
|
||||
if (ret->dest_handlers) { |
||||
upb_handlers_ref(ret->dest_handlers, ret); |
||||
} |
||||
return ret; |
||||
} |
||||
|
||||
static void freemethod(upb_pbdecodermethod *method) { |
||||
if (method->dest_handlers) { |
||||
upb_handlers_unref(method->dest_handlers, method); |
||||
} |
||||
|
||||
upb_inttable_uninit(&method->dispatch); |
||||
free(method); |
||||
} |
||||
|
||||
|
||||
/* upb_pbdecoderplan **********************************************************/ |
||||
|
||||
upb_pbdecoderplan *newplan() { |
||||
upb_pbdecoderplan *p = malloc(sizeof(*p)); |
||||
upb_inttable_init(&p->methods, UPB_CTYPE_PTR); |
||||
p->code = NULL; |
||||
p->code_end = NULL; |
||||
return p; |
||||
} |
||||
|
||||
void freeplan(void *_p) { |
||||
upb_pbdecoderplan *p = _p; |
||||
|
||||
upb_inttable_iter i; |
||||
upb_inttable_begin(&i, &p->methods); |
||||
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { |
||||
upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i)); |
||||
freemethod(method); |
||||
} |
||||
upb_inttable_uninit(&p->methods); |
||||
free(p->code); |
||||
#ifdef UPB_USE_JIT_X64 |
||||
upb_pbdecoder_freejit(p); |
||||
#endif |
||||
free(p); |
||||
} |
||||
|
||||
void set_bytecode_handlers(upb_pbdecoderplan *p, upb_handlers *h) { |
||||
upb_handlers_setstartstr(h, UPB_BYTESTREAM_BYTES, upb_pbdecoder_start, p, |
||||
NULL); |
||||
upb_handlers_setstring(h, UPB_BYTESTREAM_BYTES, upb_pbdecoder_decode, p, |
||||
freeplan); |
||||
upb_handlers_setendstr(h, UPB_BYTESTREAM_BYTES, upb_pbdecoder_end, p, NULL); |
||||
} |
||||
|
||||
static const upb_pbdecoderplan *getdecoderplan(const upb_handlers *h) { |
||||
if (upb_handlers_frametype(h) != &upb_pbdecoder_frametype) |
||||
return NULL; |
||||
upb_selector_t sel; |
||||
if (!upb_handlers_getselector(UPB_BYTESTREAM_BYTES, UPB_HANDLER_STARTSTR, |
||||
&sel)) { |
||||
return NULL; |
||||
} |
||||
return upb_handlers_gethandlerdata(h, sel); |
||||
} |
||||
|
||||
|
||||
/* compiler *******************************************************************/ |
||||
|
||||
// Data used only at compilation time.
|
||||
typedef struct { |
||||
upb_pbdecoderplan *plan; |
||||
|
||||
uint32_t *pc; |
||||
int fwd_labels[MAXLABEL]; |
||||
int back_labels[MAXLABEL]; |
||||
} compiler; |
||||
|
||||
static compiler *newcompiler(upb_pbdecoderplan *plan) { |
||||
compiler *ret = malloc(sizeof(compiler)); |
||||
ret->plan = plan; |
||||
for (int i = 0; i < MAXLABEL; i++) { |
||||
ret->fwd_labels[i] = EMPTYLABEL; |
||||
ret->back_labels[i] = EMPTYLABEL; |
||||
} |
||||
return ret; |
||||
} |
||||
|
||||
static void freecompiler(compiler *c) { |
||||
free(c); |
||||
} |
||||
|
||||
const size_t ptr_words = sizeof(void*) / sizeof(uint32_t); |
||||
|
||||
// How many words an instruction is.
|
||||
static int instruction_len(uint32_t instr) { |
||||
switch (getop(instr)) { |
||||
case OP_SETDISPATCH: return 1 + ptr_words; |
||||
case OP_TAGN: return 3; |
||||
case OP_SETBIGGROUPNUM: return 2; |
||||
default: return 1; |
||||
} |
||||
} |
||||
|
||||
bool op_has_longofs(int32_t instruction) { |
||||
switch (getop(instruction)) { |
||||
case OP_CALL: |
||||
case OP_BRANCH: |
||||
case OP_CHECKDELIM: |
||||
return true; |
||||
// The "tag" instructions only have 8 bytes available for the jump target,
|
||||
// but that is ok because these opcodes only require short jumps.
|
||||
case OP_TAG1: |
||||
case OP_TAG2: |
||||
case OP_TAGN: |
||||
return false; |
||||
default: |
||||
assert(false); |
||||
return false; |
||||
} |
||||
} |
||||
|
||||
static int32_t getofs(uint32_t instruction) { |
||||
if (op_has_longofs(instruction)) { |
||||
return (int32_t)instruction >> 8; |
||||
} else { |
||||
return (int8_t)(instruction >> 8); |
||||
} |
||||
} |
||||
|
||||
static void setofs(uint32_t *instruction, int32_t ofs) { |
||||
if (op_has_longofs(*instruction)) { |
||||
*instruction = getop(*instruction) | ofs << 8; |
||||
} else { |
||||
*instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8); |
||||
} |
||||
assert(getofs(*instruction) == ofs); // Would fail in cases of overflow.
|
||||
} |
||||
|
||||
static uint32_t pcofs(compiler *c) { return c->pc - c->plan->code; } |
||||
|
||||
// Defines a local label at the current PC location. All previous forward
|
||||
// references are updated to point to this location. The location is noted
|
||||
// for any future backward references.
|
||||
static void label(compiler *c, unsigned int label) { |
||||
assert(label < MAXLABEL); |
||||
int val = c->fwd_labels[label]; |
||||
uint32_t *codep = (val == EMPTYLABEL) ? NULL : c->plan->code + val; |
||||
while (codep) { |
||||
int ofs = getofs(*codep); |
||||
setofs(codep, c->pc - codep - instruction_len(*codep)); |
||||
codep = ofs ? codep + ofs : NULL; |
||||
} |
||||
c->fwd_labels[label] = EMPTYLABEL; |
||||
c->back_labels[label] = pcofs(c); |
||||
} |
||||
|
||||
// Creates a reference to a numbered label; either a forward reference
|
||||
// (positive arg) or backward reference (negative arg). For forward references
|
||||
// the value returned now is actually a "next" pointer into a linked list of all
|
||||
// instructions that use this label and will be patched later when the label is
|
||||
// defined with label().
|
||||
//
|
||||
// The returned value is the offset that should be written into the instruction.
|
||||
static int32_t labelref(compiler *c, int label) { |
||||
assert(label < MAXLABEL); |
||||
if (label == LABEL_DISPATCH) { |
||||
// No resolving required.
|
||||
return 0; |
||||
} else if (label < 0) { |
||||
// Backward local label. Relative to the next instruction.
|
||||
uint32_t from = (c->pc + 1) - c->plan->code; |
||||
return c->back_labels[-label] - from; |
||||
} else { |
||||
// Forward local label: prepend to (possibly-empty) linked list.
|
||||
int *lptr = &c->fwd_labels[label]; |
||||
int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c); |
||||
*lptr = pcofs(c); |
||||
return ret; |
||||
} |
||||
} |
||||
|
||||
static void put32(compiler *c, uint32_t v) { |
||||
if (c->pc == c->plan->code_end) { |
||||
int ofs = pcofs(c); |
||||
size_t oldsize = c->plan->code_end - c->plan->code; |
||||
size_t newsize = UPB_MAX(oldsize * 2, 64); |
||||
// TODO(haberman): handle OOM.
|
||||
c->plan->code = realloc(c->plan->code, newsize * sizeof(uint32_t)); |
||||
c->plan->code_end = c->plan->code + newsize; |
||||
c->pc = c->plan->code + ofs; |
||||
} |
||||
*c->pc++ = v; |
||||
} |
||||
|
||||
static void putop(compiler *c, opcode op, ...) { |
||||
va_list ap; |
||||
va_start(ap, op); |
||||
|
||||
switch (op) { |
||||
case OP_SETDISPATCH: { |
||||
uintptr_t ptr = (uintptr_t)va_arg(ap, void*); |
||||
put32(c, OP_SETDISPATCH); |
||||
put32(c, ptr); |
||||
if (sizeof(uintptr_t) > sizeof(uint32_t)) |
||||
put32(c, (uint64_t)ptr >> 32); |
||||
break; |
||||
} |
||||
case OP_STARTMSG: |
||||
case OP_ENDMSG: |
||||
case OP_PUSHTAGDELIM: |
||||
case OP_PUSHLENDELIM: |
||||
case OP_POP: |
||||
case OP_SETDELIM: |
||||
case OP_HALT: |
||||
put32(c, op); |
||||
break; |
||||
case OP_PARSE_DOUBLE: |
||||
case OP_PARSE_FLOAT: |
||||
case OP_PARSE_INT64: |
||||
case OP_PARSE_UINT64: |
||||
case OP_PARSE_INT32: |
||||
case OP_PARSE_FIXED64: |
||||
case OP_PARSE_FIXED32: |
||||
case OP_PARSE_BOOL: |
||||
case OP_PARSE_UINT32: |
||||
case OP_PARSE_SFIXED32: |
||||
case OP_PARSE_SFIXED64: |
||||
case OP_PARSE_SINT32: |
||||
case OP_PARSE_SINT64: |
||||
case OP_STARTSEQ: |
||||
case OP_SETGROUPNUM: |
||||
case OP_ENDSEQ: |
||||
case OP_STARTSUBMSG: |
||||
case OP_ENDSUBMSG: |
||||
case OP_STARTSTR: |
||||
case OP_STRING: |
||||
case OP_ENDSTR: |
||||
put32(c, op | va_arg(ap, upb_selector_t) << 8); |
||||
break; |
||||
case OP_SETBIGGROUPNUM: |
||||
put32(c, op); |
||||
put32(c, va_arg(ap, int)); |
||||
break; |
||||
case OP_CALL: { |
||||
const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *); |
||||
put32(c, op | (method->base.ofs - (pcofs(c) + 1)) << 8); |
||||
break; |
||||
} |
||||
case OP_CHECKDELIM: |
||||
case OP_BRANCH: { |
||||
uint32_t instruction = op; |
||||
int label = va_arg(ap, int); |
||||
setofs(&instruction, labelref(c, label)); |
||||
put32(c, instruction); |
||||
break; |
||||
} |
||||
case OP_TAG1: |
||||
case OP_TAG2: { |
||||
int label = va_arg(ap, int); |
||||
uint64_t tag = va_arg(ap, uint64_t); |
||||
uint32_t instruction = op | (tag << 16); |
||||
assert(tag <= 0xffff); |
||||
setofs(&instruction, labelref(c, label)); |
||||
put32(c, instruction); |
||||
break; |
||||
} |
||||
case OP_TAGN: { |
||||
int label = va_arg(ap, int); |
||||
uint64_t tag = va_arg(ap, uint64_t); |
||||
uint32_t instruction = op | (upb_value_size(tag) << 16); |
||||
setofs(&instruction, labelref(c, label)); |
||||
put32(c, instruction); |
||||
put32(c, tag); |
||||
put32(c, tag >> 32); |
||||
break; |
||||
} |
||||
} |
||||
|
||||
va_end(ap); |
||||
} |
||||
|
||||
#if defined(UPB_USE_JIT_X64) || defined(UPB_DUMP_BYTECODE) |
||||
|
||||
const char *upb_pbdecoder_getopname(unsigned int op) { |
||||
#define OP(op) [OP_ ## op] = "OP_" #op |
||||
#define T(op) OP(PARSE_##op) |
||||
static const char *names[] = { |
||||
"<no opcode>", |
||||
T(DOUBLE), T(FLOAT), T(INT64), T(UINT64), T(INT32), T(FIXED64), T(FIXED32), |
||||
T(BOOL), T(UINT32), T(SFIXED32), T(SFIXED64), T(SINT32), T(SINT64), |
||||
OP(STARTMSG), OP(ENDMSG), OP(STARTSEQ), OP(ENDSEQ), OP(STARTSUBMSG), |
||||
OP(ENDSUBMSG), OP(STARTSTR), OP(STRING), OP(ENDSTR), OP(CALL), |
||||
OP(PUSHLENDELIM), OP(PUSHTAGDELIM), OP(SETDELIM), OP(CHECKDELIM), |
||||
OP(BRANCH), OP(TAG1), OP(TAG2), OP(TAGN), OP(SETDISPATCH), OP(POP), |
||||
OP(SETGROUPNUM), OP(SETBIGGROUPNUM), OP(HALT), |
||||
}; |
||||
return op > OP_HALT ? names[0] : names[op]; |
||||
#undef OP |
||||
#undef T |
||||
} |
||||
|
||||
#endif |
||||
|
||||
#ifdef UPB_DUMP_BYTECODE |
||||
|
||||
static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) { |
||||
|
||||
uint32_t *begin = p; |
||||
|
||||
while (p < end) { |
||||
fprintf(f, "%p %8tx", p, p - begin); |
||||
uint32_t instr = *p++; |
||||
uint8_t op = getop(instr); |
||||
fprintf(f, " %s", upb_pbdecoder_getopname(op)); |
||||
switch ((opcode)op) { |
||||
case OP_SETDISPATCH: { |
||||
const upb_inttable *dispatch; |
||||
memcpy(&dispatch, p, sizeof(void*)); |
||||
p += ptr_words; |
||||
const upb_pbdecodermethod *method = |
||||
(void *)((char *)dispatch - |
||||
offsetof(upb_pbdecodermethod, dispatch)); |
||||
fprintf(f, " %s", upb_msgdef_fullname(method->msg)); |
||||
break; |
||||
} |
||||
case OP_STARTMSG: |
||||
case OP_ENDMSG: |
||||
case OP_PUSHLENDELIM: |
||||
case OP_PUSHTAGDELIM: |
||||
case OP_POP: |
||||
case OP_SETDELIM: |
||||
case OP_HALT: |
||||
break; |
||||
case OP_PARSE_DOUBLE: |
||||
case OP_PARSE_FLOAT: |
||||
case OP_PARSE_INT64: |
||||
case OP_PARSE_UINT64: |
||||
case OP_PARSE_INT32: |
||||
case OP_PARSE_FIXED64: |
||||
case OP_PARSE_FIXED32: |
||||
case OP_PARSE_BOOL: |
||||
case OP_PARSE_UINT32: |
||||
case OP_PARSE_SFIXED32: |
||||
case OP_PARSE_SFIXED64: |
||||
case OP_PARSE_SINT32: |
||||
case OP_PARSE_SINT64: |
||||
case OP_STARTSEQ: |
||||
case OP_ENDSEQ: |
||||
case OP_STARTSUBMSG: |
||||
case OP_ENDSUBMSG: |
||||
case OP_STARTSTR: |
||||
case OP_STRING: |
||||
case OP_ENDSTR: |
||||
case OP_SETGROUPNUM: |
||||
fprintf(f, " %d", instr >> 8); |
||||
break; |
||||
case OP_SETBIGGROUPNUM: |
||||
fprintf(f, " %d", *p++); |
||||
break; |
||||
case OP_CHECKDELIM: |
||||
case OP_CALL: |
||||
case OP_BRANCH: |
||||
fprintf(f, " =>0x%tx", p + getofs(instr) - begin); |
||||
break; |
||||
case OP_TAG1: |
||||
case OP_TAG2: { |
||||
fprintf(f, " tag:0x%x", instr >> 16); |
||||
if (getofs(instr)) { |
||||
fprintf(f, " =>0x%tx", p + getofs(instr) - begin); |
||||
} |
||||
break; |
||||
} |
||||
case OP_TAGN: { |
||||
uint64_t tag = *p++; |
||||
tag |= (uint64_t)*p++ << 32; |
||||
fprintf(f, " tag:0x%llx", (long long)tag); |
||||
fprintf(f, " n:%d", instr >> 16); |
||||
if (getofs(instr)) { |
||||
fprintf(f, " =>0x%tx", p + getofs(instr) - begin); |
||||
} |
||||
break; |
||||
} |
||||
} |
||||
fputs("\n", f); |
||||
} |
||||
} |
||||
|
||||
#endif |
||||
|
||||
static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) { |
||||
uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type; |
||||
uint64_t encoded_tag = upb_vencode32(tag); |
||||
// No tag should be greater than 5 bytes.
|
||||
assert(encoded_tag <= 0xffffffffff); |
||||
return encoded_tag; |
||||
} |
||||
|
||||
static void putchecktag(compiler *c, const upb_fielddef *f, |
||||
int wire_type, int dest) { |
||||
uint64_t tag = get_encoded_tag(f, wire_type); |
||||
switch (upb_value_size(tag)) { |
||||
case 1: |
||||
putop(c, OP_TAG1, dest, tag); |
||||
break; |
||||
case 2: |
||||
putop(c, OP_TAG2, dest, tag); |
||||
break; |
||||
default: |
||||
putop(c, OP_TAGN, dest, tag); |
||||
break; |
||||
} |
||||
} |
||||
|
||||
static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) { |
||||
upb_selector_t selector; |
||||
bool ok = upb_handlers_getselector(f, type, &selector); |
||||
UPB_ASSERT_VAR(ok, ok); |
||||
return selector; |
||||
} |
||||
|
||||
// Marks the current bytecode position as the dispatch target for this message,
|
||||
// field, and wire type.
|
||||
//
|
||||
static void dispatchtarget(compiler *c, upb_pbdecodermethod *method, |
||||
const upb_fielddef *f, int wire_type) { |
||||
// Offset is relative to msg base.
|
||||
uint64_t ofs = pcofs(c) - method->base.ofs; |
||||
uint32_t fn = upb_fielddef_number(f); |
||||
upb_inttable *d = &method->dispatch; |
||||
upb_value v; |
||||
if (upb_inttable_remove(d, fn, &v)) { |
||||
// TODO: prioritize based on packed setting in .proto file.
|
||||
uint64_t oldval = upb_value_getuint64(v); |
||||
assert(((oldval >> 8) & 0xff) == 0); // wt2 should not be set yet.
|
||||
upb_inttable_insert(d, fn, upb_value_uint64(oldval | (wire_type << 8))); |
||||
upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs)); |
||||
} else { |
||||
upb_inttable_insert(d, fn, upb_value_uint64((ofs << 16) | wire_type)); |
||||
} |
||||
} |
||||
|
||||
static void putpush(compiler *c, const upb_fielddef *f) { |
||||
if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) { |
||||
putop(c, OP_PUSHLENDELIM); |
||||
} else { |
||||
uint32_t fn = upb_fielddef_number(f); |
||||
putop(c, OP_PUSHTAGDELIM); |
||||
if (fn >= 1 << 24) { |
||||
putop(c, OP_SETBIGGROUPNUM, fn); |
||||
} else { |
||||
putop(c, OP_SETGROUPNUM, fn); |
||||
} |
||||
} |
||||
} |
||||
|
||||
static upb_pbdecodermethod *find_submethod(const compiler *c, |
||||
const upb_pbdecodermethod *method, |
||||
const upb_fielddef *f) { |
||||
const void *key = method->dest_handlers ? |
||||
(const void*)upb_handlers_getsubhandlers(method->dest_handlers, f) : |
||||
(const void*)upb_downcast_msgdef(upb_fielddef_subdef(f)); |
||||
upb_value v; |
||||
bool ok = upb_inttable_lookupptr(&c->plan->methods, key, &v); |
||||
UPB_ASSERT_VAR(ok, ok); |
||||
return upb_value_getptr(v); |
||||
} |
||||
|
||||
// Adds bytecode for parsing the given message to the given decoderplan,
|
||||
// while adding all dispatch targets to this message's dispatch table.
|
||||
static void compile_method(compiler *c, upb_pbdecodermethod *method) { |
||||
assert(method); |
||||
|
||||
// Symbolic names for our local labels.
|
||||
const int LABEL_LOOPSTART = 1; // Top of a repeated field loop.
|
||||
const int LABEL_LOOPBREAK = 2; // To jump out of a repeated loop
|
||||
const int LABEL_FIELD = 3; // Jump backward to find the most recent field.
|
||||
const int LABEL_ENDMSG = 4; // To reach the OP_ENDMSG instr for this msg.
|
||||
|
||||
// Index is descriptor type.
|
||||
static const uint8_t native_wire_types[] = { |
||||
UPB_WIRE_TYPE_END_GROUP, // ENDGROUP
|
||||
UPB_WIRE_TYPE_64BIT, // DOUBLE
|
||||
UPB_WIRE_TYPE_32BIT, // FLOAT
|
||||
UPB_WIRE_TYPE_VARINT, // INT64
|
||||
UPB_WIRE_TYPE_VARINT, // UINT64
|
||||
UPB_WIRE_TYPE_VARINT, // INT32
|
||||
UPB_WIRE_TYPE_64BIT, // FIXED64
|
||||
UPB_WIRE_TYPE_32BIT, // FIXED32
|
||||
UPB_WIRE_TYPE_VARINT, // BOOL
|
||||
UPB_WIRE_TYPE_DELIMITED, // STRING
|
||||
UPB_WIRE_TYPE_START_GROUP, // GROUP
|
||||
UPB_WIRE_TYPE_DELIMITED, // MESSAGE
|
||||
UPB_WIRE_TYPE_DELIMITED, // BYTES
|
||||
UPB_WIRE_TYPE_VARINT, // UINT32
|
||||
UPB_WIRE_TYPE_VARINT, // ENUM
|
||||
UPB_WIRE_TYPE_32BIT, // SFIXED32
|
||||
UPB_WIRE_TYPE_64BIT, // SFIXED64
|
||||
UPB_WIRE_TYPE_VARINT, // SINT32
|
||||
UPB_WIRE_TYPE_VARINT, // SINT64
|
||||
}; |
||||
|
||||
// Clear all entries in the dispatch table.
|
||||
upb_inttable_uninit(&method->dispatch); |
||||
upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64); |
||||
|
||||
method->base.ofs = pcofs(c); |
||||
putop(c, OP_SETDISPATCH, &method->dispatch); |
||||
putop(c, OP_STARTMSG); |
||||
label(c, LABEL_FIELD); |
||||
upb_msg_iter i; |
||||
for(upb_msg_begin(&i, method->msg); !upb_msg_done(&i); upb_msg_next(&i)) { |
||||
const upb_fielddef *f = upb_msg_iter_field(&i); |
||||
upb_descriptortype_t type = upb_fielddef_descriptortype(f); |
||||
|
||||
// From a decoding perspective, ENUM is the same as INT32.
|
||||
if (type == UPB_DESCRIPTOR_TYPE_ENUM) |
||||
type = UPB_DESCRIPTOR_TYPE_INT32; |
||||
|
||||
label(c, LABEL_FIELD); |
||||
|
||||
switch (upb_fielddef_type(f)) { |
||||
case UPB_TYPE_MESSAGE: { |
||||
const upb_pbdecodermethod *sub_m = find_submethod(c, method, f); |
||||
int wire_type = (type == UPB_DESCRIPTOR_TYPE_MESSAGE) ? |
||||
UPB_WIRE_TYPE_DELIMITED : UPB_WIRE_TYPE_START_GROUP; |
||||
if (upb_fielddef_isseq(f)) { |
||||
putop(c, OP_CHECKDELIM, LABEL_ENDMSG); |
||||
putchecktag(c, f, wire_type, LABEL_DISPATCH); |
||||
dispatchtarget(c, method, f, wire_type); |
||||
putop(c, OP_PUSHTAGDELIM); |
||||
putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); |
||||
label(c, LABEL_LOOPSTART); |
||||
putpush(c, f); |
||||
putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG)); |
||||
putop(c, OP_CALL, sub_m); |
||||
putop(c, OP_POP); |
||||
putop(c, OP_ENDSUBMSG, getsel(f, UPB_HANDLER_ENDSUBMSG)); |
||||
if (wire_type == UPB_WIRE_TYPE_DELIMITED) { |
||||
putop(c, OP_SETDELIM); |
||||
} |
||||
putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); |
||||
putchecktag(c, f, wire_type, LABEL_LOOPBREAK); |
||||
putop(c, OP_BRANCH, -LABEL_LOOPSTART); |
||||
label(c, LABEL_LOOPBREAK); |
||||
putop(c, OP_POP); |
||||
putop(c, OP_ENDSEQ, getsel(f, UPB_HANDLER_ENDSEQ)); |
||||
} else { |
||||
putop(c, OP_CHECKDELIM, LABEL_ENDMSG); |
||||
putchecktag(c, f, wire_type, LABEL_DISPATCH); |
||||
dispatchtarget(c, method, f, wire_type); |
||||
putpush(c, f); |
||||
putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG)); |
||||
putop(c, OP_CALL, sub_m); |
||||
putop(c, OP_POP); |
||||
putop(c, OP_ENDSUBMSG, getsel(f, UPB_HANDLER_ENDSUBMSG)); |
||||
if (wire_type == UPB_WIRE_TYPE_DELIMITED) { |
||||
putop(c, OP_SETDELIM); |
||||
} |
||||
} |
||||
break; |
||||
} |
||||
case UPB_TYPE_STRING: |
||||
case UPB_TYPE_BYTES: |
||||
if (upb_fielddef_isseq(f)) { |
||||
putop(c, OP_CHECKDELIM, LABEL_ENDMSG); |
||||
putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); |
||||
dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); |
||||
putop(c, OP_PUSHTAGDELIM); |
||||
putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); |
||||
label(c, LABEL_LOOPSTART); |
||||
putop(c, OP_PUSHLENDELIM); |
||||
putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR)); |
||||
putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING)); |
||||
putop(c, OP_POP); |
||||
putop(c, OP_ENDSTR, getsel(f, UPB_HANDLER_ENDSTR)); |
||||
putop(c, OP_SETDELIM); |
||||
putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); |
||||
putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK); |
||||
putop(c, OP_BRANCH, -LABEL_LOOPSTART); |
||||
label(c, LABEL_LOOPBREAK); |
||||
putop(c, OP_POP); |
||||
putop(c, OP_ENDSEQ, getsel(f, UPB_HANDLER_ENDSEQ)); |
||||
} else { |
||||
putop(c, OP_CHECKDELIM, LABEL_ENDMSG); |
||||
putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); |
||||
dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); |
||||
putop(c, OP_PUSHLENDELIM); |
||||
putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR)); |
||||
putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING)); |
||||
putop(c, OP_POP); |
||||
putop(c, OP_ENDSTR, getsel(f, UPB_HANDLER_ENDSTR)); |
||||
putop(c, OP_SETDELIM); |
||||
} |
||||
break; |
||||
default: { |
||||
opcode parse_type = (opcode)type; |
||||
assert(parse_type >= 0 && parse_type <= OP_MAX); |
||||
upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); |
||||
int wire_type = native_wire_types[upb_fielddef_descriptortype(f)]; |
||||
if (upb_fielddef_isseq(f)) { |
||||
putop(c, OP_CHECKDELIM, LABEL_ENDMSG); |
||||
putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); |
||||
dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); |
||||
putop(c, OP_PUSHLENDELIM); |
||||
putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Packed
|
||||
label(c, LABEL_LOOPSTART); |
||||
putop(c, parse_type, sel); |
||||
putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); |
||||
putop(c, OP_BRANCH, -LABEL_LOOPSTART); |
||||
dispatchtarget(c, method, f, wire_type); |
||||
putop(c, OP_PUSHTAGDELIM); |
||||
putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Non-packed
|
||||
label(c, LABEL_LOOPSTART); |
||||
putop(c, parse_type, sel); |
||||
putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); |
||||
putchecktag(c, f, wire_type, LABEL_LOOPBREAK); |
||||
putop(c, OP_BRANCH, -LABEL_LOOPSTART); |
||||
label(c, LABEL_LOOPBREAK); |
||||
putop(c, OP_POP); // Packed and non-packed join.
|
||||
putop(c, OP_ENDSEQ, getsel(f, UPB_HANDLER_ENDSEQ)); |
||||
putop(c, OP_SETDELIM); // Could remove for non-packed by dup ENDSEQ.
|
||||
} else { |
||||
putop(c, OP_CHECKDELIM, LABEL_ENDMSG); |
||||
putchecktag(c, f, wire_type, LABEL_DISPATCH); |
||||
dispatchtarget(c, method, f, wire_type); |
||||
putop(c, parse_type, sel); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
// For now we just loop back to the last field of the message (or if none,
|
||||
// the DISPATCH opcode for the message.
|
||||
putop(c, OP_BRANCH, -LABEL_FIELD); |
||||
label(c, LABEL_ENDMSG); |
||||
putop(c, OP_ENDMSG); |
||||
|
||||
upb_inttable_compact(&method->dispatch); |
||||
} |
||||
|
||||
// Populate "methods" with new upb_pbdecodermethod objects reachable from "md".
|
||||
// "h" can be NULL, in which case the methods will not be statically bound to
|
||||
// destination handlers.
|
||||
//
|
||||
// Returns the method for this msgdef/handlers.
|
||||
//
|
||||
// Note that there is a deep difference between keying the method table on
|
||||
// upb_msgdef and keying it on upb_handlers. Since upb_msgdef : upb_handlers
|
||||
// can be 1:many, binding a handlers statically can result in *more* methods
|
||||
// being generated than if the methods are dynamically-bound.
|
||||
//
|
||||
// On the other hand, if/when the optimization mentioned below is implemented,
|
||||
// binding to a upb_handlers can result in *fewer* methods being generated if
|
||||
// many of the submessages have no handlers bound to them.
|
||||
static upb_pbdecodermethod *find_methods(compiler *c, |
||||
const upb_msgdef *md, |
||||
const upb_handlers *h) { |
||||
const void *key = h ? (const void*)h : (const void*)md; |
||||
upb_value v; |
||||
if (upb_inttable_lookupptr(&c->plan->methods, key, &v)) |
||||
return upb_value_getptr(v); |
||||
upb_pbdecodermethod *method = newmethod(md, h); |
||||
// Takes ownership of method.
|
||||
upb_inttable_insertptr(&c->plan->methods, key, upb_value_ptr(method)); |
||||
|
||||
upb_msg_iter i; |
||||
for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { |
||||
const upb_fielddef *f = upb_msg_iter_field(&i); |
||||
if (upb_fielddef_type(f) != UPB_TYPE_MESSAGE) |
||||
continue; |
||||
const upb_handlers *sub_h = h ? upb_handlers_getsubhandlers(h, f) : NULL; |
||||
|
||||
if (h && !sub_h && |
||||
upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) { |
||||
// OPT: We could optimize away the sub-method, but would have to make sure
|
||||
// this field is compiled as a string instead of a submessage.
|
||||
} |
||||
|
||||
find_methods(c, upb_downcast_msgdef(upb_fielddef_subdef(f)), sub_h); |
||||
} |
||||
|
||||
return method; |
||||
} |
||||
|
||||
// (Re-)compile bytecode for all messages in "msgs", ensuring that the code
|
||||
// for "md" is emitted first. Overwrites any existing bytecode in "c".
|
||||
static void compile_methods(compiler *c) { |
||||
// Start over at the beginning of the bytecode.
|
||||
c->pc = c->plan->code; |
||||
compile_method(c, c->plan->topmethod); |
||||
|
||||
upb_inttable_iter i; |
||||
upb_inttable_begin(&i, &c->plan->methods); |
||||
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { |
||||
upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i)); |
||||
if (method != c->plan->topmethod) { |
||||
compile_method(c, method); |
||||
} |
||||
} |
||||
} |
||||
|
||||
|
||||
/* JIT setup. ******************************************************************/ |
||||
|
||||
#ifdef UPB_USE_JIT_X64 |
||||
|
||||
static void sethandlers(upb_pbdecoderplan *p, upb_handlers *h, bool allowjit) { |
||||
p->jit_code = NULL; |
||||
|
||||
if (allowjit) { |
||||
upb_pbdecoder_jit(p); // Compile byte-code into machine code.
|
||||
upb_handlers_setstartstr(h, UPB_BYTESTREAM_BYTES, upb_pbdecoder_start, p, |
||||
freeplan); |
||||
upb_handlers_setstring(h, UPB_BYTESTREAM_BYTES, p->jit_code, NULL, NULL); |
||||
upb_handlers_setendstr(h, UPB_BYTESTREAM_BYTES, upb_pbdecoder_end, p, NULL); |
||||
} else { |
||||
set_bytecode_handlers(p, h); |
||||
} |
||||
} |
||||
|
||||
static bool bind_dynamic(bool allowjit) { |
||||
// For the moment, JIT handlers always bind statically, but bytecode handlers
|
||||
// never do.
|
||||
return !allowjit; |
||||
} |
||||
|
||||
#else // UPB_USE_JIT_X64
|
||||
|
||||
static void sethandlers(upb_pbdecoderplan *p, upb_handlers *h, bool allowjit) { |
||||
// No JIT compiled in; use bytecode handlers unconditionally.
|
||||
UPB_UNUSED(allowjit); |
||||
set_bytecode_handlers(p, h); |
||||
} |
||||
|
||||
static bool bind_dynamic(bool allowjit) { |
||||
// Bytecode handlers never bind statically.
|
||||
return true; |
||||
} |
||||
|
||||
#endif // UPB_USE_JIT_X64
|
||||
|
||||
|
||||
/* Public interface ***********************************************************/ |
||||
|
||||
bool upb_pbdecoder_isdecoder(const upb_handlers *h) { |
||||
return getdecoderplan(h) != NULL; |
||||
} |
||||
|
||||
bool upb_pbdecoderplan_hasjitcode(const upb_pbdecoderplan *p) { |
||||
#ifdef UPB_USE_JIT_X64 |
||||
return p->jit_code != NULL; |
||||
#else |
||||
UPB_UNUSED(p); |
||||
return false; |
||||
#endif |
||||
} |
||||
|
||||
bool upb_pbdecoder_hasjitcode(const upb_handlers *h) { |
||||
const upb_pbdecoderplan *p = getdecoderplan(h); |
||||
if (!p) return false; |
||||
return upb_pbdecoderplan_hasjitcode(p); |
||||
} |
||||
|
||||
uint32_t *upb_pbdecoderplan_codebase(const upb_pbdecoderplan *p) { |
||||
return p->code; |
||||
} |
||||
|
||||
upb_string_handler *upb_pbdecoderplan_jitcode(const upb_pbdecoderplan *p) { |
||||
#ifdef UPB_USE_JIT_X64 |
||||
return p->jit_code; |
||||
#else |
||||
assert(false); |
||||
return NULL; |
||||
#endif |
||||
} |
||||
|
||||
const upb_handlers *upb_pbdecoder_getdesthandlers(const upb_handlers *h) { |
||||
const upb_pbdecoderplan *p = getdecoderplan(h); |
||||
if (!p) return NULL; |
||||
return p->topmethod->dest_handlers; |
||||
} |
||||
|
||||
const upb_handlers *upb_pbdecoder_gethandlers(const upb_handlers *dest, |
||||
bool allowjit, |
||||
const void *owner) { |
||||
UPB_UNUSED(allowjit); |
||||
assert(upb_handlers_isfrozen(dest)); |
||||
const upb_msgdef *md = upb_handlers_msgdef(dest); |
||||
|
||||
upb_pbdecoderplan *p = newplan(); |
||||
compiler *c = newcompiler(p); |
||||
|
||||
if (bind_dynamic(allowjit)) { |
||||
// If binding dynamically, remove the reference against destination
|
||||
// handlers.
|
||||
dest = NULL; |
||||
} |
||||
|
||||
p->topmethod = find_methods(c, md, dest); |
||||
|
||||
// We compile in two passes:
|
||||
// 1. all messages are assigned relative offsets from the beginning of the
|
||||
// bytecode (saved in method->base).
|
||||
// 2. forwards OP_CALL instructions can be correctly linked since message
|
||||
// offsets have been previously assigned.
|
||||
//
|
||||
// Could avoid the second pass by linking OP_CALL instructions somehow.
|
||||
compile_methods(c); |
||||
compile_methods(c); |
||||
p->code_end = c->pc; |
||||
|
||||
#ifdef UPB_DUMP_BYTECODE |
||||
FILE *f = fopen("/tmp/upb-bytecode", "wb"); |
||||
assert(f); |
||||
dumpbc(p->code, p->code_end, stderr); |
||||
dumpbc(p->code, p->code_end, f); |
||||
fclose(f); |
||||
#endif |
||||
|
||||
upb_handlers *h = upb_handlers_new( |
||||
UPB_BYTESTREAM, &upb_pbdecoder_frametype, owner); |
||||
sethandlers(p, h, allowjit); |
||||
|
||||
freecompiler(c); |
||||
|
||||
return h; |
||||
} |
@ -0,0 +1,368 @@ |
||||
/*
|
||||
* upb - a minimalist implementation of protocol buffers. |
||||
* |
||||
* Copyright (c) 2013 Google Inc. See LICENSE for details. |
||||
* Author: Josh Haberman <jhaberman@gmail.com> |
||||
* |
||||
* Driver code for the x64 JIT compiler. |
||||
*/ |
||||
|
||||
#include <dlfcn.h> |
||||
#include <stdio.h> |
||||
#include <sys/mman.h> |
||||
#include "upb/pb/decoder.h" |
||||
#include "upb/pb/decoder.int.h" |
||||
#include "upb/pb/varint.int.h" |
||||
#include "upb/shim/shim.h" |
||||
|
||||
// These defines are necessary for DynASM codegen.
|
||||
// See dynasm/dasm_proto.h for more info.
|
||||
#define Dst_DECL jitcompiler *jc |
||||
#define Dst_REF (jc->dynasm) |
||||
#define Dst (jc) |
||||
|
||||
// In debug mode, make DynASM do internal checks (must be defined before any
|
||||
// dasm header is included.
|
||||
#ifndef NDEBUG |
||||
#define DASM_CHECKS |
||||
#endif |
||||
|
||||
#ifndef MAP_ANONYMOUS |
||||
#define MAP_ANONYMOUS MAP_ANON |
||||
#endif |
||||
|
||||
#define DECODE_EOF -3 |
||||
|
||||
typedef struct { |
||||
upb_pbdecoderplan *plan; |
||||
uint32_t *pc; |
||||
|
||||
// This pointer is allocated by dasm_init() and freed by dasm_free().
|
||||
struct dasm_State *dynasm; |
||||
|
||||
// Maps bytecode pc location -> pclabel.
|
||||
upb_inttable pclabels; |
||||
upb_inttable pcdefined; |
||||
|
||||
// For marking labels that should go into the generated code.
|
||||
// Maps pclabel -> char* label (string is owned by the table).
|
||||
upb_inttable asmlabels; |
||||
|
||||
// For checking that two asmlabels aren't defined for the same byte.
|
||||
int lastlabelofs; |
||||
|
||||
// The total number of pclabels currently defined.
|
||||
uint32_t pclabel_count; |
||||
|
||||
// Used by DynASM to store globals.
|
||||
void **globals; |
||||
|
||||
bool usefp; |
||||
bool chkret; |
||||
} jitcompiler; |
||||
|
||||
// Functions called by codegen.
|
||||
static int pclabel(jitcompiler *jc, const void *here); |
||||
static int define_pclabel(jitcompiler *jc, const void *here); |
||||
static void asmlabel(jitcompiler *jc, const char *fmt, ...); |
||||
|
||||
#include "dynasm/dasm_proto.h" |
||||
#include "dynasm/dasm_x86.h" |
||||
#include "upb/pb/compile_decoder_x64.h" |
||||
|
||||
static jitcompiler *newjitcompiler(upb_pbdecoderplan *plan) { |
||||
jitcompiler *jc = malloc(sizeof(jitcompiler)); |
||||
jc->usefp = false; |
||||
jc->chkret = false; |
||||
jc->plan = plan; |
||||
jc->pclabel_count = 0; |
||||
jc->lastlabelofs = -1; |
||||
upb_inttable_init(&jc->pclabels, UPB_CTYPE_UINT32); |
||||
upb_inttable_init(&jc->pcdefined, UPB_CTYPE_BOOL); |
||||
upb_inttable_init(&jc->asmlabels, UPB_CTYPE_PTR); |
||||
jc->globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*jc->globals)); |
||||
|
||||
dasm_init(jc, 1); |
||||
dasm_setupglobal(jc, jc->globals, UPB_JIT_GLOBAL__MAX); |
||||
dasm_setup(jc, upb_jit_actionlist); |
||||
|
||||
return jc; |
||||
} |
||||
|
||||
static void freejitcompiler(jitcompiler *jc) { |
||||
upb_inttable_iter i; |
||||
upb_inttable_begin(&i, &jc->asmlabels); |
||||
for (; !upb_inttable_done(&i); upb_inttable_next(&i)) { |
||||
free(upb_value_getptr(upb_inttable_iter_value(&i))); |
||||
} |
||||
upb_inttable_uninit(&jc->asmlabels); |
||||
upb_inttable_uninit(&jc->pclabels); |
||||
upb_inttable_uninit(&jc->pcdefined); |
||||
dasm_free(jc); |
||||
free(jc->globals); |
||||
free(jc); |
||||
} |
||||
|
||||
// Returns a pclabel associated with the given arbitrary pointer.
|
||||
static int pclabel(jitcompiler *jc, const void *here) { |
||||
upb_value v; |
||||
bool found = upb_inttable_lookupptr(&jc->pclabels, here, &v); |
||||
if (!found) { |
||||
upb_value_setuint32(&v, jc->pclabel_count++); |
||||
dasm_growpc(jc, jc->pclabel_count); |
||||
upb_inttable_insertptr(&jc->pclabels, here, v); |
||||
} |
||||
return upb_value_getuint32(v); |
||||
} |
||||
|
||||
// Defines a pclabel associated with the given arbitrary pointer.
|
||||
// May only be called once (to avoid redefining the pclabel).
|
||||
static int define_pclabel(jitcompiler *jc, const void *here) { |
||||
// Will assert-fail if it already exists.
|
||||
upb_inttable_insertptr(&jc->pcdefined, here, upb_value_bool(true)); |
||||
return pclabel(jc, here); |
||||
} |
||||
|
||||
static void upb_reg_jit_gdb(jitcompiler *jc); |
||||
|
||||
// Given a pcofs relative to method, returns the machine code offset for it
|
||||
// (relative to the beginning of the machine code).
|
||||
int nativeofs(jitcompiler *jc, const upb_pbdecodermethod *method, int pcofs) { |
||||
void *target = jc->plan->code + method->base.ofs + pcofs; |
||||
return dasm_getpclabel(jc, pclabel(jc, target)); |
||||
} |
||||
|
||||
// Given a pcofs relative to this method's base, returns a machine code offset
|
||||
// relative to pclabel(dispatch->array) (which is used in jitdispatch as the
|
||||
// machine code base for dispatch table lookups).
|
||||
uint32_t dispatchofs(jitcompiler *jc, const upb_pbdecodermethod *method, |
||||
int pcofs) { |
||||
int ofs1 = dasm_getpclabel(jc, pclabel(jc, method->dispatch.array)); |
||||
int ofs2 = nativeofs(jc, method, pcofs); |
||||
assert(ofs1 > 0); |
||||
assert(ofs2 > 0); |
||||
int ret = ofs2 - ofs1; |
||||
assert(ret > 0); |
||||
return ret; |
||||
} |
||||
|
||||
// Rewrites the dispatch tables into machine code offsets.
|
||||
static void patchdispatch(jitcompiler *jc) { |
||||
upb_inttable_iter i; |
||||
upb_inttable_begin(&i, &jc->plan->methods); |
||||
for (; !upb_inttable_done(&i); upb_inttable_next(&i)) { |
||||
upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i)); |
||||
upb_inttable *dispatch = &method->dispatch; |
||||
upb_inttable_iter i2; |
||||
upb_inttable_begin(&i2, dispatch); |
||||
for (; !upb_inttable_done(&i2); upb_inttable_next(&i2)) { |
||||
uintptr_t key = upb_inttable_iter_key(&i2); |
||||
if (key == 0) continue; |
||||
uint64_t val = upb_value_getuint64(upb_inttable_iter_value(&i2)); |
||||
uint64_t newval; |
||||
if (key <= UPB_MAX_FIELDNUMBER) { |
||||
// Primary slot.
|
||||
uint64_t oldofs = val >> 16; |
||||
uint64_t newofs = dispatchofs(jc, method, oldofs); |
||||
newval = (val & 0xffff) | (newofs << 16); |
||||
assert((int64_t)newval > 0); |
||||
} else { |
||||
// Secondary slot. Since we have 64 bits for the value, we use an
|
||||
// absolute offset.
|
||||
newval = (uint64_t)(jc->plan->jit_code + nativeofs(jc, method, val)); |
||||
} |
||||
bool ok = upb_inttable_replace(dispatch, key, upb_value_uint64(newval)); |
||||
UPB_ASSERT_VAR(ok, ok); |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Define for JIT debugging.
|
||||
#ifdef UPB_JIT_LOAD_SO |
||||
static void load_so(jitcompiler *jc) { |
||||
// Dump to a .so file in /tmp and load that, so all the tooling works right
|
||||
// (for example, debuggers and profilers will see symbol names for the JIT-ted
|
||||
// code). This is the same goal of the GDB JIT code below, but the GDB JIT
|
||||
// interface is only used/understood by GDB. Hopefully a standard will
|
||||
// develop for registering JIT-ted code that all tools will recognize,
|
||||
// rendering this obsolete.
|
||||
//
|
||||
// Requires that gcc is available from the command-line.
|
||||
|
||||
// Convert all asm labels from pclabel offsets to machine code offsets.
|
||||
upb_inttable_iter i; |
||||
upb_inttable mclabels; |
||||
upb_inttable_init(&mclabels, UPB_CTYPE_PTR); |
||||
upb_inttable_begin(&i, &jc->asmlabels); |
||||
for (; !upb_inttable_done(&i); upb_inttable_next(&i)) { |
||||
upb_inttable_insert(&mclabels, |
||||
dasm_getpclabel(jc, upb_inttable_iter_key(&i)), |
||||
upb_inttable_iter_value(&i)); |
||||
} |
||||
|
||||
FILE *f = fopen("/tmp/upb-jit-code.s", "w"); |
||||
if (f) { |
||||
fputs(" .text\n\n", f); |
||||
size_t linelen = 0; |
||||
for (size_t i = 0; i < jc->plan->jit_size; i++) { |
||||
upb_value v; |
||||
if (upb_inttable_lookup(&mclabels, i, &v)) { |
||||
const char *label = upb_value_getptr(v); |
||||
// "X." makes our JIT syms recognizable as such, which we build into
|
||||
// other tooling.
|
||||
fprintf(f, "\n\nX.%s:\n", label); |
||||
fprintf(f, " .globl X.%s", label); |
||||
linelen = 1000; |
||||
} |
||||
if (linelen >= 77) { |
||||
linelen = fprintf(f, "\n .byte %u", jit_code[i]); |
||||
} else { |
||||
linelen += fprintf(f, ",%u", jit_code[i]); |
||||
} |
||||
} |
||||
fputs("\n", f); |
||||
fclose(f); |
||||
} else { |
||||
fprintf(stderr, "Couldn't open /tmp/upb-jit-code.s for writing/\n"); |
||||
} |
||||
|
||||
// TODO: racy
|
||||
if (system("gcc -shared -o /tmp/upb-jit-code.so /tmp/upb-jit-code.s") != 0) { |
||||
abort(); |
||||
} |
||||
|
||||
jc->dl = dlopen("/tmp/upb-jit-code.so", RTLD_LAZY); |
||||
if (!jc->dl) { |
||||
fprintf(stderr, "Couldn't dlopen(): %s\n", dlerror()); |
||||
abort(); |
||||
} |
||||
|
||||
munmap(jit_code, jc->plan->jit_size); |
||||
jit_code = dlsym(jc->dl, "X.enterjit"); |
||||
if (!jit_code) { |
||||
fprintf(stderr, "Couldn't find enterjit sym\n"); |
||||
abort(); |
||||
} |
||||
|
||||
upb_inttable_uninit(&mclabels); |
||||
} |
||||
#endif |
||||
|
||||
void upb_pbdecoder_jit(upb_pbdecoderplan *plan) { |
||||
plan->debug_info = NULL; |
||||
plan->dl = NULL; |
||||
|
||||
jitcompiler *jc = newjitcompiler(plan); |
||||
emit_static_asm(jc); |
||||
jitbytecode(jc); |
||||
|
||||
int dasm_status = dasm_link(jc, &jc->plan->jit_size); |
||||
if (dasm_status != DASM_S_OK) { |
||||
fprintf(stderr, "DynASM error; returned status: 0x%08x\n", dasm_status); |
||||
abort(); |
||||
} |
||||
|
||||
char *jit_code = mmap(NULL, jc->plan->jit_size, PROT_READ | PROT_WRITE, |
||||
MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); |
||||
dasm_encode(jc, jit_code); |
||||
mprotect(jit_code, jc->plan->jit_size, PROT_EXEC | PROT_READ); |
||||
upb_reg_jit_gdb(jc); |
||||
|
||||
#ifdef UPB_JIT_LOAD_SO |
||||
load_so(jc); |
||||
#endif |
||||
|
||||
jc->plan->jit_code = (upb_string_handler *)jit_code; |
||||
patchdispatch(jc); |
||||
freejitcompiler(jc); |
||||
} |
||||
|
||||
void upb_pbdecoder_freejit(upb_pbdecoderplan *plan) { |
||||
if (!plan->jit_code) return; |
||||
if (plan->dl) { |
||||
dlclose(plan->dl); |
||||
} else { |
||||
munmap(plan->jit_code, plan->jit_size); |
||||
} |
||||
free(plan->debug_info); |
||||
// TODO: unregister GDB JIT interface.
|
||||
} |
||||
|
||||
// To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code
|
||||
// at runtime. GDB 7.x+ has defined an interface for doing this, and these
|
||||
// structure/function defintions are copied out of gdb/jit.h
|
||||
//
|
||||
// We need to give GDB an ELF file at runtime describing the symbols we have
|
||||
// generated. To avoid implementing the ELF format, we generate an ELF file
|
||||
// at compile-time and compile it in as a character string. We can replace
|
||||
// a few key constants (address of JIT-ted function and its size) by looking
|
||||
// for a few magic numbers and doing a dumb string replacement.
|
||||
//
|
||||
// Unfortunately this approach is showing its limits; we can only define one
|
||||
// symbol, and this approach only works with GDB. The .so approach above is
|
||||
// more reliable.
|
||||
|
||||
#ifndef __APPLE__ |
||||
const unsigned char upb_jit_debug_elf_file[] = { |
||||
#include "upb/pb/jit_debug_elf_file.h" |
||||
}; |
||||
|
||||
typedef enum { |
||||
GDB_JIT_NOACTION = 0, |
||||
GDB_JIT_REGISTER, |
||||
GDB_JIT_UNREGISTER |
||||
} jit_actions_t; |
||||
|
||||
typedef struct gdb_jit_entry { |
||||
struct gdb_jit_entry *next_entry; |
||||
struct gdb_jit_entry *prev_entry; |
||||
const char *symfile_addr; |
||||
uint64_t symfile_size; |
||||
} gdb_jit_entry; |
||||
|
||||
typedef struct { |
||||
uint32_t version; |
||||
uint32_t action_flag; |
||||
gdb_jit_entry *relevant_entry; |
||||
gdb_jit_entry *first_entry; |
||||
} gdb_jit_descriptor; |
||||
|
||||
gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL}; |
||||
|
||||
void __attribute__((noinline)) __jit_debug_register_code() { |
||||
__asm__ __volatile__(""); |
||||
} |
||||
|
||||
static void upb_reg_jit_gdb(jitcompiler *jc) { |
||||
// Create debug info.
|
||||
size_t elf_len = sizeof(upb_jit_debug_elf_file); |
||||
jc->plan->debug_info = malloc(elf_len); |
||||
memcpy(jc->plan->debug_info, upb_jit_debug_elf_file, elf_len); |
||||
uint64_t *p = (void *)jc->plan->debug_info; |
||||
for (; (void *)(p + 1) <= (void *)jc->plan->debug_info + elf_len; ++p) { |
||||
if (*p == 0x12345678) { |
||||
*p = (uintptr_t)jc->plan->jit_code; |
||||
} |
||||
if (*p == 0x321) { |
||||
*p = jc->plan->jit_size; |
||||
} |
||||
} |
||||
|
||||
// Register the JIT-ted code with GDB.
|
||||
gdb_jit_entry *e = malloc(sizeof(gdb_jit_entry)); |
||||
e->next_entry = __jit_debug_descriptor.first_entry; |
||||
e->prev_entry = NULL; |
||||
if (e->next_entry) e->next_entry->prev_entry = e; |
||||
e->symfile_addr = jc->plan->debug_info; |
||||
e->symfile_size = elf_len; |
||||
__jit_debug_descriptor.first_entry = e; |
||||
__jit_debug_descriptor.relevant_entry = e; |
||||
__jit_debug_descriptor.action_flag = GDB_JIT_REGISTER; |
||||
__jit_debug_register_code(); |
||||
} |
||||
|
||||
#else |
||||
|
||||
static void upb_reg_jit_gdb(jitcompiler *jc) { (void)jc; } |
||||
|
||||
#endif |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,242 @@ |
||||
|
||||
#ifndef UPB_DECODER_INT_H_ |
||||
#define UPB_DECODER_INT_H_ |
||||
|
||||
#include <stdlib.h> |
||||
#include "upb/def.h" |
||||
#include "upb/handlers.h" |
||||
#include "upb/sink.h" |
||||
#include "upb/pb/decoder.h" |
||||
|
||||
// Opcode definitions. The canonical meaning of each opcode is its
|
||||
// implementation in the interpreter (the JIT is written to match this).
|
||||
//
|
||||
// All instructions have the opcode in the low byte.
|
||||
// Instruction format for most instructions is:
|
||||
//
|
||||
// +-------------------+--------+
|
||||
// | arg (24) | op (8) |
|
||||
// +-------------------+--------+
|
||||
//
|
||||
// Exceptions are indicated below. A few opcodes are multi-word.
|
||||
typedef enum { |
||||
// Opcodes 1-8, 13, 15-18 parse their respective descriptor types.
|
||||
// Arg for all of these is the upb selector for this field.
|
||||
#define T(type) OP_PARSE_ ## type = UPB_DESCRIPTOR_TYPE_ ## type |
||||
T(DOUBLE), T(FLOAT), T(INT64), T(UINT64), T(INT32), T(FIXED64), T(FIXED32), |
||||
T(BOOL), T(UINT32), T(SFIXED32), T(SFIXED64), T(SINT32), T(SINT64), |
||||
#undef T |
||||
OP_STARTMSG = 9, // No arg.
|
||||
OP_ENDMSG = 10, // No arg.
|
||||
OP_STARTSEQ = 11, |
||||
OP_ENDSEQ = 12, |
||||
OP_STARTSUBMSG = 14, |
||||
OP_ENDSUBMSG = 19, |
||||
OP_STARTSTR = 20, |
||||
OP_STRING = 21, |
||||
OP_ENDSTR = 22, |
||||
|
||||
OP_PUSHTAGDELIM = 23, // No arg.
|
||||
OP_PUSHLENDELIM = 24, // No arg.
|
||||
OP_POP = 25, // No arg.
|
||||
OP_SETDELIM = 26, // No arg.
|
||||
OP_SETGROUPNUM = 27, |
||||
OP_SETBIGGROUPNUM = 28, // two words: | unused (24) | opc || groupnum (32) |
|
||||
|
||||
// The arg for these opcodes is a local label reference.
|
||||
OP_CHECKDELIM = 29, |
||||
OP_CALL = 30, |
||||
OP_BRANCH = 31, |
||||
|
||||
// Different opcodes depending on how many bytes expected.
|
||||
OP_TAG1 = 32, // | expected tag (16) | jump target (8) | opc (8) |
|
||||
OP_TAG2 = 33, // | expected tag (16) | jump target (8) | opc (8) |
|
||||
OP_TAGN = 34, // three words:
|
||||
// | unused (16) | jump target(8) | opc (8) |
|
||||
// | expected tag 1 (32) |
|
||||
// | expected tag 2 (32) |
|
||||
|
||||
OP_SETDISPATCH = 35, // N words:
|
||||
// | unused (24) | opc |
|
||||
// | upb_inttable* (32 or 64) |
|
||||
|
||||
OP_HALT = 36, // No arg.
|
||||
} opcode; |
||||
|
||||
#define OP_MAX OP_HALT |
||||
|
||||
UPB_INLINE opcode getop(uint32_t instr) { return instr & 0xff; } |
||||
|
||||
const upb_frametype upb_pbdecoder_frametype; |
||||
|
||||
// Decoder entry points; used as handlers.
|
||||
void *upb_pbdecoder_start(void *closure, const void *handler_data, |
||||
size_t size_hint); |
||||
size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, |
||||
size_t size); |
||||
bool upb_pbdecoder_end(void *closure, const void *handler_data); |
||||
|
||||
// Decoder-internal functions that the JIT calls to handle fallback paths.
|
||||
void *upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, |
||||
size_t size); |
||||
size_t upb_pbdecoder_suspend(upb_pbdecoder *d); |
||||
int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, uint32_t fieldnum, |
||||
uint8_t wire_type); |
||||
int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, uint64_t expected); |
||||
int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, uint64_t *u64); |
||||
int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32); |
||||
int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64); |
||||
void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg); |
||||
|
||||
// Error messages that are shared between the bytecode and JIT decoders.
|
||||
extern const char *kPbDecoderStackOverflow; |
||||
|
||||
typedef struct _upb_pbdecoderplan upb_pbdecoderplan; |
||||
|
||||
// Access to decoderplan members needed by the decoder.
|
||||
bool upb_pbdecoderplan_hasjitcode(const upb_pbdecoderplan *p); |
||||
uint32_t *upb_pbdecoderplan_codebase(const upb_pbdecoderplan *p); |
||||
const char *upb_pbdecoder_getopname(unsigned int op); |
||||
upb_string_handler *upb_pbdecoderplan_jitcode(const upb_pbdecoderplan *p); |
||||
|
||||
// JIT entry point.
|
||||
void upb_pbdecoder_jit(upb_pbdecoderplan *plan); |
||||
void upb_pbdecoder_freejit(upb_pbdecoderplan *plan); |
||||
|
||||
|
||||
// A special label that means "do field dispatch for this message and branch to
|
||||
// wherever that takes you."
|
||||
#define LABEL_DISPATCH 0 |
||||
|
||||
#define DECODE_OK -1 |
||||
#define DECODE_MISMATCH -2 // Used only from checktag_slow().
|
||||
#define DECODE_ENDGROUP -2 // Used only from checkunknown().
|
||||
|
||||
typedef struct { |
||||
// The absolute stream offset of the end-of-frame delimiter.
|
||||
// Non-delimited frames (groups and non-packed repeated fields) reuse the
|
||||
// delimiter of their parent, even though the frame may not end there.
|
||||
//
|
||||
// NOTE: the JIT stores a slightly different value here for non-top frames.
|
||||
// It stores the value relative to the end of the enclosed message. But the
|
||||
// innermost frame is still stored the same way, which is important for
|
||||
// ensuring that calls from the JIT into C work correctly.
|
||||
uint64_t end_ofs; |
||||
uint32_t *base; |
||||
uint32_t groupnum; |
||||
union { |
||||
upb_inttable *dispatch; // Not used by the JIT.
|
||||
void *closure; // Only used by the JIT.
|
||||
} u; |
||||
} upb_pbdecoder_frame; |
||||
|
||||
struct upb_pbdecoder { |
||||
// Where we push parsed data (not owned).
|
||||
upb_sink *sink; |
||||
|
||||
size_t call_len; |
||||
uint32_t *pc, *last; |
||||
|
||||
// Current input buffer and its stream offset.
|
||||
const char *buf, *ptr, *end, *checkpoint; |
||||
|
||||
// End of the delimited region, relative to ptr, or NULL if not in this buf.
|
||||
const char *delim_end; |
||||
|
||||
// End of the delimited region, relative to ptr, or end if not in this buf.
|
||||
const char *data_end; |
||||
|
||||
// Overall stream offset of "buf."
|
||||
uint64_t bufstart_ofs; |
||||
|
||||
// How many bytes past the end of the user buffer we want to skip.
|
||||
size_t skip; |
||||
|
||||
// Buffer for residual bytes not parsed from the previous buffer.
|
||||
// The maximum number of residual bytes we require is 12; a five-byte
|
||||
// unknown tag plus an eight-byte value, less one because the value
|
||||
// is only a partial value.
|
||||
char residual[12]; |
||||
char *residual_end; |
||||
|
||||
// Stores the user buffer passed to our decode function.
|
||||
const char *buf_param; |
||||
size_t size_param; |
||||
|
||||
#ifdef UPB_USE_JIT_X64 |
||||
// Used momentarily by the generated code to store a value while a user
|
||||
// function is called.
|
||||
uint32_t tmp_len; |
||||
|
||||
const void *saved_rsp; |
||||
#endif |
||||
|
||||
upb_status *status; |
||||
|
||||
// Our internal stack.
|
||||
upb_pbdecoder_frame *top, *limit; |
||||
upb_pbdecoder_frame stack[UPB_DECODER_MAX_NESTING]; |
||||
uint32_t *callstack[UPB_DECODER_MAX_NESTING * 2]; |
||||
}; |
||||
|
||||
// Data pertaining to a single decoding method/function.
|
||||
// Each method contains code to parse a single message type.
|
||||
// If may or may not be bound to a destination handlers object.
|
||||
typedef struct { |
||||
// While compiling, the base is relative in "ofs", after compiling it is
|
||||
// absolute in "ptr".
|
||||
union { |
||||
uint32_t ofs; // PC offset of method.
|
||||
const void *ptr; // Pointer to bytecode or machine code for this method.
|
||||
} base; |
||||
|
||||
// Whether this method is native code or bytecode.
|
||||
bool native_code; |
||||
|
||||
// The message type that this method is parsing.
|
||||
const upb_msgdef *msg; |
||||
|
||||
// The destination handlers this method is bound to, or NULL if this method
|
||||
// can be bound to a destination handlers instance at runtime.
|
||||
//
|
||||
// If non-NULL, we own a ref.
|
||||
const upb_handlers *dest_handlers; |
||||
|
||||
// The dispatch table layout is:
|
||||
// [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ]
|
||||
//
|
||||
// If wt1 matches, jump to the 48-bit offset. If wt2 matches, lookup
|
||||
// (UPB_MAX_FIELDNUMBER + fieldnum) and jump there.
|
||||
//
|
||||
// We need two wire types because of packed/non-packed compatibility. A
|
||||
// primitive repeated field can use either wire type and be valid. While we
|
||||
// could key the table on fieldnum+wiretype, the table would be 8x sparser.
|
||||
//
|
||||
// Storing two wire types in the primary value allows us to quickly rule out
|
||||
// the second wire type without needing to do a separate lookup (this case is
|
||||
// less common than an unknown field).
|
||||
upb_inttable dispatch; |
||||
} upb_pbdecodermethod; |
||||
|
||||
struct _upb_pbdecoderplan { |
||||
// Pointer to bytecode.
|
||||
uint32_t *code, *code_end; |
||||
|
||||
// Maps upb_msgdef*/upb_handlers* -> upb_pbdecodermethod
|
||||
upb_inttable methods; |
||||
|
||||
// The method that starts parsing when we first call into the plan.
|
||||
// Ideally we will remove the idea that any of the methods in the plan
|
||||
// are special like this, so that any method can be the top-level one.
|
||||
upb_pbdecodermethod *topmethod; |
||||
|
||||
#ifdef UPB_USE_JIT_X64 |
||||
// JIT-generated machine code (else NULL).
|
||||
upb_string_handler *jit_code; |
||||
size_t jit_size; |
||||
char *debug_info; |
||||
void *dl; |
||||
#endif |
||||
}; |
||||
|
||||
#endif // UPB_DECODER_INT_H_
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue