Protocol Buffers - Google's data interchange format (grpc依赖) https://developers.google.com/protocol-buffers/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1086 lines
34 KiB

|//
|// upb - a minimalist implementation of protocol buffers.
|//
|// Copyright (c) 2011 Google Inc. See LICENSE for details.
|// Author: Josh Haberman <jhaberman@gmail.com>
|//
|// JIT compiler for upb_pbdecoder on x86. Given a decoderplan object (which
|// contains an embedded set of upb_handlers), generates code specialized to
|// parsing the specific message and calling specific handlers.
|//
|// Since the JIT can call other functions (the JIT'ted code is not a leaf
|// function) we must respect alignment rules. All x86-64 systems require
|// 16-byte stack alignment.
#define _GNU_SOURCE
#include <stdio.h>
#include <sys/mman.h>
#include "dynasm/dasm_x86.h"
#include "upb/shim/shim.h"
#ifndef MAP_ANONYMOUS
# define MAP_ANONYMOUS MAP_ANON
#endif
// We map into the low 32 bits when we can, but if this is not available
// (like on OS X) we take what we can get. It's not required for correctness,
// it's just a performance thing that makes it more likely that our jumps
// can be rel32 (i.e. within 32-bits of our pc) instead of the longer
// sequence required for other jumps (see callp).
#ifndef MAP_32BIT
#define MAP_32BIT 0
#endif
// These are used to track jump targets for messages and fields.
enum {
STARTMSG = 0,
AFTER_STARTMSG = 1,
ENDOFBUF = 2,
ENDOFMSG = 3,
DYNDISPATCH = 4,
TOTAL_MSG_PCLABELS = 5,
};
enum {
FIELD = 0,
FIELD_NO_TYPECHECK = 1,
TOTAL_FIELD_PCLABELS = 2,
};
typedef struct {
uint32_t max_field_number;
// Currently keyed on field number. Could also try keying it
// on encoded or decoded tag, or on encoded field number.
void **tablearray;
// Pointer to the JIT code for parsing this message.
void *jit_func;
} upb_jitmsginfo;
static uint32_t upb_getpclabel(decoderplan *plan, const void *obj, int n) {
upb_value v;
bool found = upb_inttable_lookupptr(&plan->pclabels, obj, &v);
UPB_ASSERT_VAR(found, found);
return upb_value_getuint32(v) + n;
}
static upb_jitmsginfo *upb_getmsginfo(const decoderplan *plan,
const upb_handlers *h) {
upb_value v;
bool found = upb_inttable_lookupptr(&plan->msginfo, h, &v);
UPB_ASSERT_VAR(found, found);
return upb_value_getptr(v);
}
// To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code
// at runtime. GDB 7.x+ has defined an interface for doing this, and these
// structure/function defintions are copied out of gdb/jit.h
//
// We need to give GDB an ELF file at runtime describing the symbols we have
// generated. To avoid implementing the ELF format, we generate an ELF file
// at compile-time and compile it in as a character string. We can replace
// a few key constants (address of JIT-ted function and its size) by looking
// for a few magic numbers and doing a dumb string replacement.
#ifndef __APPLE__
const unsigned char upb_jit_debug_elf_file[] = {
#include "upb/pb/jit_debug_elf_file.h"
};
typedef enum
{
GDB_JIT_NOACTION = 0,
GDB_JIT_REGISTER,
GDB_JIT_UNREGISTER
} jit_actions_t;
typedef struct gdb_jit_entry {
struct gdb_jit_entry *next_entry;
struct gdb_jit_entry *prev_entry;
const char *symfile_addr;
uint64_t symfile_size;
} gdb_jit_entry;
typedef struct {
uint32_t version;
uint32_t action_flag;
gdb_jit_entry *relevant_entry;
gdb_jit_entry *first_entry;
} gdb_jit_descriptor;
gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL};
void __attribute__((noinline)) __jit_debug_register_code() {
__asm__ __volatile__("");
}
void upb_reg_jit_gdb(decoderplan *plan) {
// Create debug info.
size_t elf_len = sizeof(upb_jit_debug_elf_file);
plan->debug_info = malloc(elf_len);
memcpy(plan->debug_info, upb_jit_debug_elf_file, elf_len);
uint64_t *p = (void*)plan->debug_info;
for (; (void*)(p+1) <= (void*)plan->debug_info + elf_len; ++p) {
if (*p == 0x12345678) { *p = (uintptr_t)plan->jit_code; }
if (*p == 0x321) { *p = plan->jit_size; }
}
// Register the JIT-ted code with GDB.
gdb_jit_entry *e = malloc(sizeof(gdb_jit_entry));
e->next_entry = __jit_debug_descriptor.first_entry;
e->prev_entry = NULL;
if (e->next_entry) e->next_entry->prev_entry = e;
e->symfile_addr = plan->debug_info;
e->symfile_size = elf_len;
__jit_debug_descriptor.first_entry = e;
__jit_debug_descriptor.relevant_entry = e;
__jit_debug_descriptor.action_flag = GDB_JIT_REGISTER;
__jit_debug_register_code();
}
#else
void upb_reg_jit_gdb(decoderplan *plan) {
(void)plan;
}
#endif
// Has to be a separate function, otherwise GCC will complain about
// expressions like (&foo != NULL) because they will never evaluate
// to false.
static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
|.arch x64
|.actionlist upb_jit_actionlist
|.globals UPB_JIT_GLOBAL_
|.globalnames upb_jit_globalnames
|
|// Calling conventions. Note -- this will need to be changed for
|// Windows, which uses a different calling convention!
|.define ARG1_64, rdi
|.define ARG2_8, r6b // DynASM's equivalent to "sil" -- low byte of esi.
|.define ARG2_32, esi
|.define ARG2_64, rsi
|.define ARG3_32, edx
|.define ARG3_64, rdx
|.define ARG4_32, ecx
|.define ARG4_64, rcx
|.define XMMARG1, xmm0
|
|// Register allocation / type map.
|// ALL of the code in this file uses these register allocations.
|// When we "call" within this file, we do not use regular calling
|// conventions, but of course when calling to user callbacks we must.
|.define PTR, rbx // Writing this to DECODER->ptr commits our progress.
|.define CLOSURE, r12
|.type SINKFRAME, upb_sinkframe, r13
|.type FRAME, frame, r14
|.type DECODER, upb_pbdecoder, r15
|.type SINK, upb_sink
|
|.macro callp, addr
|| upb_assert_notnull(addr);
|// TODO(haberman): fix this. I believe the predicate we should actually be
|// testing is whether the jump distance is greater than INT32_MAX, not the
|// absolute address of the target.
|| if ((uintptr_t)addr < 0xffffffff) {
| call &addr
|| } else {
| mov64 rax, (uintptr_t)addr
| call rax
|| }
|.endmacro
|
|.macro loadarg2, val
||{
|| uintptr_t data = (uintptr_t)val;
|| if (data > 0xffffffff) {
| mov64 ARG2_64, data
|| } else if (data) {
| mov ARG2_32, data
|| } else {
| xor ARG2_32, ARG2_32
|| }
|| }
|.endmacro
|
|.macro load_handler_data, h, f, type
| loadarg2 gethandlerdata(h, f, type)
|.endmacro
|
|// Checkpoints our progress by writing PTR to DECODER, and
|// checks for end-of-buffer.
|.macro checkpoint, h
| mov DECODER->ptr, PTR
| cmp PTR, DECODER->effective_end
| jae =>upb_getpclabel(plan, h, ENDOFBUF)
|.endmacro
|
|.macro check_bool_ret
| test al, al
| jz ->exit_jit
|.endmacro
|
|.macro check_ptr_ret
| test rax, rax
| jz ->exit_jit
|.endmacro
|
|// Decodes varint into ARG2.
|// Inputs:
|// - ecx: first 4 bytes of varint
|// - offset: offset from PTR where varint begins
|// Outputs:
|// - ARG2: contains decoded varint
|// - rax: new PTR
|.macro decode_loaded_varint, offset
| // Check for <=2 bytes inline, otherwise jump to 2-10 byte decoder.
| lea rax, [PTR + offset + 1]
| mov ARG2_32, ecx
| and ARG2_32, 0x7f
| test cl, cl
| jns >9
| lea rax, [PTR + offset + 2]
| movzx edx, ch
| and edx, 0x7f
| shl edx, 7
| or ARG2_32, edx
| test cx, cx
| jns >9
| mov ARG1_64, rax
|// XXX: I don't think this handles 64-bit values correctly.
|// Test with UINT64_MAX
| callp upb_vdecode_max8_fast
|// rax return from function will contain new pointer
| mov ARG2_64, rdx
| check_ptr_ret // Check for unterminated, >10-byte varint.
|9:
|.endmacro
|
|.macro decode_varint, offset
| mov ecx, dword [PTR + offset]
| decode_loaded_varint offset
| mov PTR, rax
|.endmacro
|
|// Table-based field dispatch.
|// Inputs:
|// - ecx: first 4 bytes of tag
|// Outputs:
|// - edx: field number
|// - esi: wire type
|// Could specialize this by avoiding the value masking: could just key the
|// table on the raw (length-masked) varint to save 3-4 cycles of latency.
|// Currently only support tables where all entries are in the array part.
|.macro dyndispatch_, h
|| asmlabel(plan, "_UPB_MCODE_DISPATCH_%s.%d",
|| upb_msgdef_fullname(upb_handlers_msgdef(h)), rand());
|=>upb_getpclabel(plan, h, DYNDISPATCH):
| decode_loaded_varint, 0
| mov ecx, esi
| shr ecx, 3
| and esi, 0x7 // Note: this value is used in the FIELD pclabel below.
| cmp esi, UPB_WIRE_TYPE_END_GROUP
| je >1
|| upb_jitmsginfo *mi = upb_getmsginfo(plan, h);
| cmp ecx, mi->max_field_number // Bounds-check the field.
| ja ->exit_jit // In the future; could be unknown label
|| if ((uintptr_t)mi->tablearray < 0xffffffff) {
| // TODO: support hybrid array/hash tables.
| mov rax, qword [rcx*8 + mi->tablearray]
|| } else {
| mov64 rax, (uintptr_t)mi->tablearray
| mov rax, qword [rax + rcx*8]
|| }
| jmp rax // Dispatch: unpredictable jump.
|1:
|// End group.
| cmp ecx, FRAME->group_fieldnum
| jne ->exit_jit // Unexpected END_GROUP tag.
| mov PTR, rax // rax came from decode_loaded_varint
| mov DECODER->ptr, PTR
| jmp =>upb_getpclabel(plan, h, ENDOFMSG)
|.endmacro
|
|.if 1
| // Replicated dispatch: larger code, but better branch prediction.
| .define dyndispatch, dyndispatch_
|.else
| // Single dispatch: smaller code, could be faster because of reduced
| // icache usage. We keep this around to allow for easy comparison between
| // the two.
| .macro dyndispatch, h
| jmp =>upb_getpclabel(plan, h, DYNDISPATCH)
| .endmacro
|.endif
|
|.macro pushsinkframe, handlers, field, endtype
| mov rax, DECODER->sink
| mov dword SINKFRAME->selector, getselector(field, endtype)
| lea rcx, [SINKFRAME + sizeof(upb_sinkframe)] // rcx for short addressing
| cmp rcx, SINK:rax->limit
| jae ->exit_jit // Frame stack overflow.
| mov64 r9, (uintptr_t)handlers
| mov SINKFRAME:rcx->h, r9
| mov SINKFRAME:rcx->closure, CLOSURE
| mov SINK:rax->top, rcx
| mov SINKFRAME, rcx
|.endmacro
|
|.macro popsinkframe
| sub SINKFRAME, sizeof(upb_sinkframe)
| mov rax, DECODER->sink
| mov SINK:rax->top, SINKFRAME
| mov CLOSURE, SINKFRAME->closure
|.endmacro
|
|// Push a stack frame (not the CPU stack, the upb_pbdecoder stack).
|.macro pushframe, handlers, field, end_offset_, endtype
|// Decoder Frame.
| lea rax, [FRAME + sizeof(frame)] // rax for short addressing
| cmp rax, DECODER->limit
| jae ->exit_jit // Frame stack overflow.
| mov64 r10, (uintptr_t)field
| mov FRAME:rax->f, r10
| mov qword FRAME:rax->end_ofs, end_offset_
| mov byte FRAME:rax->is_sequence, (endtype == UPB_HANDLER_ENDSEQ)
| mov byte FRAME:rax->is_packed, 0
|| if (upb_fielddef_istagdelim(field) && endtype == UPB_HANDLER_ENDSUBMSG) {
| mov dword FRAME:rax->group_fieldnum, upb_fielddef_number(field)
|| } else {
| mov dword FRAME:rax->group_fieldnum, 0xffffffff
|| }
| mov DECODER->top, rax
| mov FRAME, rax
| pushsinkframe handlers, field, endtype
|.endmacro
|
|.macro popframe
| sub FRAME, sizeof(frame)
| mov DECODER->top, FRAME
| popsinkframe
| setmsgend
|.endmacro
|
|.macro setmsgend
| mov rsi, DECODER->jit_end
| mov rax, qword FRAME->end_ofs // Will be UINT64_MAX for groups.
| sub rax, qword DECODER->bufstart_ofs
| add rax, qword DECODER->buf // rax = d->buf + f->end_ofs - d->bufstart_ofs
| jc >8 // If the addition overflowed, use jit_end
| cmp rax, rsi
| ja >8 // If jit_end is less, use jit_end
| mov rsi, rax // Use frame end.
|8:
| mov DECODER->effective_end, rsi
|.endmacro
|
|// rcx contains the tag, compare it against "tag", but since it is a varint
|// we must only compare as many bytes as actually have data.
|.macro checktag, tag
|| switch (upb_value_size(tag)) {
|| case 1:
| cmp cl, tag
|| break;
|| case 2:
| cmp cx, tag
|| break;
|| case 3:
| and ecx, 0xffffff // 3 bytes
| cmp rcx, tag
|| case 4:
| cmp ecx, tag
|| break;
|| case 5:
| mov64 rdx, 0xffffffffff // 5 bytes
| and rcx, rdx
| cmp rcx, tag
|| break;
|| default: abort();
|| }
|.endmacro
|
|.macro sethas, reg, hasbit
|| if (hasbit >= 0) {
| or byte [reg + ((uint32_t)hasbit / 8)], (1 << ((uint32_t)hasbit % 8))
|| }
|.endmacro
#include <stdlib.h>
#include "upb/pb/varint.h"
static upb_func *gethandler(const upb_handlers *h, const upb_fielddef *f,
upb_handlertype_t type) {
return upb_handlers_gethandler(h, getselector(f, type));
}
static uintptr_t gethandlerdata(const upb_handlers *h, const upb_fielddef *f,
upb_handlertype_t type) {
return (uintptr_t)upb_handlers_gethandlerdata(h, getselector(f, type));
}
static void asmlabel(decoderplan *plan, const char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
char *str = NULL;
size_t size = 0;
upb_vrprintf(&str, &size, 0, fmt, ap);
va_end(ap);
uint32_t label = plan->pclabel_count++;
dasm_growpc(plan, plan->pclabel_count);
|=>label:
upb_inttable_insert(&plan->asmlabels, label, upb_value_ptr(str));
}
// Decodes the next val into ARG2, advances PTR.
static void upb_decoderplan_jit_decodefield(decoderplan *plan,
size_t tag_size,
const upb_handlers *h,
const upb_fielddef *f) {
// Decode the value into arg 3 for the callback.
asmlabel(plan, "UPB_MCODE_DECODE_FIELD_%s.%s",
upb_msgdef_fullname(upb_handlers_msgdef(h)),
upb_fielddef_name(f));
switch (upb_fielddef_descriptortype(f)) {
case UPB_DESCRIPTOR_TYPE_DOUBLE:
| movsd XMMARG1, qword [PTR + tag_size]
| add PTR, 8 + tag_size
break;
case UPB_DESCRIPTOR_TYPE_FIXED64:
case UPB_DESCRIPTOR_TYPE_SFIXED64:
| mov ARG2_64, qword [PTR + tag_size]
| add PTR, 8 + tag_size
break;
case UPB_DESCRIPTOR_TYPE_FLOAT:
| movss XMMARG1, dword [PTR + tag_size]
| add PTR, 4 + tag_size
break;
case UPB_DESCRIPTOR_TYPE_FIXED32:
case UPB_DESCRIPTOR_TYPE_SFIXED32:
| mov ARG2_32, dword [PTR + tag_size]
| add PTR, 4 + tag_size
break;
case UPB_DESCRIPTOR_TYPE_BOOL:
// Can't assume it's one byte long, because bool must be wire-compatible
// with all of the varint integer types.
| decode_varint tag_size
| test ARG2_64, ARG2_64
| setne al
| movzx ARG2_32, al
break;
case UPB_DESCRIPTOR_TYPE_INT64:
case UPB_DESCRIPTOR_TYPE_UINT64:
case UPB_DESCRIPTOR_TYPE_INT32:
case UPB_DESCRIPTOR_TYPE_UINT32:
case UPB_DESCRIPTOR_TYPE_ENUM:
| decode_varint tag_size
break;
case UPB_DESCRIPTOR_TYPE_SINT64:
// 64-bit zig-zag decoding.
| decode_varint tag_size
| mov rax, ARG2_64
| shr ARG2_64, 1
| and rax, 1
| neg rax
| xor ARG2_64, rax
break;
case UPB_DESCRIPTOR_TYPE_SINT32:
// 32-bit zig-zag decoding.
| decode_varint tag_size
| mov eax, ARG2_32
| shr ARG2_32, 1
| and eax, 1
| neg eax
| xor ARG2_32, eax
break;
case UPB_DESCRIPTOR_TYPE_STRING:
case UPB_DESCRIPTOR_TYPE_BYTES: {
// We only handle the case where the entire string is in our current
// buf, which sidesteps any security problems. The C path has more
// robust checks.
| mov ecx, dword [PTR + tag_size]
| decode_loaded_varint tag_size
| mov rdi, DECODER->end
| sub rdi, rax
| cmp ARG2_64, rdi // if (len > d->end - str)
| ja ->exit_jit // Can't deliver, whole string not in buf.
| mov PTR, rax
upb_func *handler = gethandler(h, f, UPB_HANDLER_STARTSTR);
if (handler) {
// void* startstr(void *c, const void *hd, size_t hint)
| mov DECODER->tmp_len, ARG2_32
| mov ARG1_64, CLOSURE
| mov ARG3_64, ARG2_64
| load_handler_data h, f, UPB_HANDLER_STARTSTR
| callp handler
| check_ptr_ret
| mov ARG1_64, rax // sub-closure
| mov ARG4_32, DECODER->tmp_len
} else {
| mov ARG1_64, CLOSURE
| mov ARG4_64, ARG2_64
}
handler = gethandler(h, f, UPB_HANDLER_STRING);
if (handler) {
// size_t str(void *c, const void *hd, const char *buf, size_t len)
| load_handler_data h, f, UPB_HANDLER_STRING
| mov ARG3_64, PTR
| callp handler
// TODO: properly handle returns other than "n" (the whole string).
| add PTR, rax
} else {
| add PTR, ARG4_64
}
handler = gethandler(h, f, UPB_HANDLER_ENDSTR);
if (handler) {
// bool endstr(const upb_sinkframe *frame);
| mov ARG1_64, CLOSURE
| load_handler_data h, f, UPB_HANDLER_ENDSTR
| callp handler
| check_bool_ret
}
break;
}
// Will dispatch callbacks and call submessage in a second.
case UPB_DESCRIPTOR_TYPE_MESSAGE:
| decode_varint tag_size
break;
case UPB_DESCRIPTOR_TYPE_GROUP:
| add PTR, tag_size
break;
default: abort();
}
}
static void upb_decoderplan_jit_callcb(decoderplan *plan,
const upb_handlers *h,
const upb_fielddef *f) {
// Call callbacks. Specializing the append accessors didn't yield a speed
// increase in benchmarks.
asmlabel(plan, "UPB_MCODE_CALLCB_%s.%s",
upb_msgdef_fullname(upb_handlers_msgdef(h)),
upb_fielddef_name(f));
if (upb_fielddef_issubmsg(f)) {
// Call startsubmsg handler (if any).
upb_func *startsubmsg = gethandler(h, f, UPB_HANDLER_STARTSUBMSG);
if (startsubmsg) {
// upb_sflow_t startsubmsg(const upb_sinkframe *frame)
| mov DECODER->tmp_len, ARG2_32
| mov ARG1_64, CLOSURE
| load_handler_data h, f, UPB_HANDLER_STARTSUBMSG
| callp startsubmsg
| check_ptr_ret
| mov CLOSURE, rax
}
const upb_handlers *sub_h = upb_handlers_getsubhandlers(h, f);
if (sub_h) {
if (upb_fielddef_istagdelim(f)) {
| mov rdx, UPB_NONDELIMITED
} else {
| mov esi, DECODER->tmp_len
| mov rdx, PTR
| sub rdx, DECODER->buf
| add rdx, DECODER->bufstart_ofs
| add rdx, rsi // = d->bufstart_ofs + (d->ptr - d->buf) + delim_len
}
| pushframe sub_h, f, rdx, UPB_HANDLER_ENDSUBMSG
| call =>upb_getpclabel(plan, sub_h, STARTMSG)
| popframe
} else {
if (upb_fielddef_istagdelim(f)) {
// Groups with no handlers not supported yet.
assert(false);
} else {
| mov esi, DECODER->tmp_len
| add PTR, rsi
}
}
// Call endsubmsg handler (if any).
upb_func *endsubmsg = gethandler(h, f, UPB_HANDLER_ENDSUBMSG);
if (endsubmsg) {
// upb_flow_t endsubmsg(void *closure, upb_value fval);
| mov ARG1_64, CLOSURE
| load_handler_data h, f, UPB_HANDLER_ENDSUBMSG
| callp endsubmsg
| check_bool_ret
}
} else if (!upb_fielddef_isstring(f)) {
upb_handlertype_t handlertype = upb_handlers_getprimitivehandlertype(f);
upb_selector_t sel = getselector(f, handlertype);
upb_func *handler = gethandler(h, f, handlertype);
const upb_shim_data *data = upb_shim_getdata(h, sel);
if (data) {
switch (upb_fielddef_type(f)) {
case UPB_TYPE_INT64:
case UPB_TYPE_UINT64:
| mov [CLOSURE + data->offset], ARG2_64
break;
case UPB_TYPE_INT32:
case UPB_TYPE_UINT32:
case UPB_TYPE_ENUM:
| mov [CLOSURE + data->offset], ARG2_32
break;
case UPB_TYPE_DOUBLE:
| movsd qword [CLOSURE + data->offset], XMMARG1
break;
case UPB_TYPE_FLOAT:
| movss dword [CLOSURE + data->offset], XMMARG1
break;
case UPB_TYPE_BOOL:
| mov [CLOSURE + data->offset], ARG2_8
break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES:
case UPB_TYPE_MESSAGE:
assert(false); break;
}
| sethas CLOSURE, data->hasbit
} else if (handler) {
// bool value(const upb_sinkframe* frame, ctype val)
| mov ARG1_64, CLOSURE
| mov ARG3_64, ARG2_64
| load_handler_data h, f, handlertype
| callp handler
| check_bool_ret
}
}
}
static uint64_t upb_get_encoded_tag(const upb_fielddef *f) {
uint32_t tag = (upb_fielddef_number(f) << 3) |
upb_decoder_types[upb_fielddef_descriptortype(f)].native_wire_type;
uint64_t encoded_tag = upb_vencode32(tag);
// No tag should be greater than 5 bytes.
assert(encoded_tag <= 0xffffffffff);
return encoded_tag;
}
static void upb_decoderplan_jit_endseq(decoderplan *plan,
const upb_handlers *h,
const upb_fielddef *f) {
| popframe
upb_func *endseq = gethandler(h, f, UPB_HANDLER_ENDSEQ);
if (endseq) {
| mov ARG1_64, CLOSURE
| load_handler_data h, f, UPB_HANDLER_ENDSEQ
| callp endseq
}
}
// PTR should point to the beginning of the tag.
static void upb_decoderplan_jit_field(decoderplan *plan,
const upb_handlers *h,
const upb_fielddef *f,
const upb_fielddef *next_f) {
asmlabel(plan, "UPB_MCODE_FIELD_%s.%s",
upb_msgdef_fullname(upb_handlers_msgdef(h)),
upb_fielddef_name(f));
uint64_t tag = upb_get_encoded_tag(f);
uint64_t next_tag = next_f ? upb_get_encoded_tag(next_f) : 0;
int tag_size = upb_value_size(tag);
// PC-label for the dispatch table.
// We check the wire type (which must be loaded in edi) because the
// table is keyed on field number, not type.
|=>upb_getpclabel(plan, f, FIELD):
| cmp esi, (tag & 0x7)
| jne ->exit_jit // In the future: could be an unknown field or packed.
|=>upb_getpclabel(plan, f, FIELD_NO_TYPECHECK):
if (upb_fielddef_isseq(f)) {
upb_func *startseq = gethandler(h, f, UPB_HANDLER_STARTSEQ);
if (startseq) {
| mov ARG1_64, CLOSURE
| load_handler_data h, f, UPB_HANDLER_STARTSEQ
| callp startseq
| check_ptr_ret
| mov CLOSURE, rax
}
| mov rsi, FRAME->end_ofs
| pushframe h, f, rsi, UPB_HANDLER_ENDSEQ
}
|1: // Label for repeating this field.
upb_decoderplan_jit_decodefield(plan, tag_size, h, f);
upb_decoderplan_jit_callcb(plan, h, f);
// This is kind of gross; future redesign should take into account how to
// make this work nicely. The difficult part is that the sequence can be
// broken either by end-of-message or by seeing a different field; in both
// cases we need to call the endseq handler, but what we do after that
// depends on which case triggered the end-of-sequence.
| mov DECODER->ptr, PTR
| cmp PTR, DECODER->jit_end
| jae ->exit_jit
| cmp PTR, DECODER->effective_end
| jb >2
if (upb_fielddef_isseq(f)) {
upb_decoderplan_jit_endseq(plan, h, f);
}
| jmp =>upb_getpclabel(plan, h, ENDOFMSG)
|2:
| mov rcx, qword [PTR]
if (upb_fielddef_isseq(f)) {
| checktag tag
| je <1
upb_decoderplan_jit_endseq(plan, h, f);
// Load next tag again (popframe/endseq clobbered it).
| mov rcx, qword [PTR]
}
if (next_tag != 0) {
| checktag next_tag
| je =>upb_getpclabel(plan, next_f, FIELD_NO_TYPECHECK)
}
// Fall back to dynamic dispatch.
| dyndispatch h
}
static int upb_compare_uint32(const void *a, const void *b) {
return *(uint32_t*)a - *(uint32_t*)b;
}
static void upb_decoderplan_jit_msg(decoderplan *plan,
const upb_handlers *h) {
asmlabel(plan, "UPB_MCODE_DECODEMSG_%s",
upb_msgdef_fullname(upb_handlers_msgdef(h)));
|=>upb_getpclabel(plan, h, AFTER_STARTMSG):
| push rbp
| mov rbp, rsp
| jmp >1
|=>upb_getpclabel(plan, h, STARTMSG):
| push rbp
| mov rbp, rsp
// Call startmsg handler (if any):
upb_func *startmsg = upb_handlers_gethandler(h, UPB_STARTMSG_SELECTOR);
if (startmsg) {
// upb_flow_t startmsg(void *closure, const void *hd);
| mov ARG1_64, CLOSURE
| loadarg2 upb_handlers_gethandlerdata(h, UPB_STARTMSG_SELECTOR)
| callp startmsg
| check_bool_ret
}
|1:
| setmsgend
| checkpoint h
| mov ecx, dword [PTR]
| dyndispatch_ h
// --------- New code section (does not fall through) ------------------------
// Emit code for parsing each field (dynamic dispatch contains pointers to
// all of these).
// Create an ordering over the fields in field number order.
// Parsing will theoretically be fastest if we emit code in the same
// order as field numbers are seen on-the-wire because of an optimization
// in the generated code that skips dynamic dispatch if the next field is
// as expected.
const upb_msgdef *md = upb_handlers_msgdef(h);
int num_keys = upb_msgdef_numfields(md);
uint32_t *keys = malloc(num_keys * sizeof(*keys));
int idx = 0;
upb_msg_iter i;
for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
keys[idx++] = upb_fielddef_number(upb_msg_iter_field(&i));
}
qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);
for(int i = 0; i < num_keys; i++) {
const upb_fielddef *f = upb_msgdef_itof(md, keys[i]);
const upb_fielddef *next_f =
(i + 1 < num_keys) ? upb_msgdef_itof(md, keys[i + 1]) : NULL;
upb_decoderplan_jit_field(plan, h, f, next_f);
}
free(keys);
// --------- New code section (does not fall through) ------------------------
// End-of-buf / end-of-message.
// We hit a buffer limit; either we hit jit_end or end-of-submessage.
|=>upb_getpclabel(plan, h, ENDOFBUF):
| cmp PTR, DECODER->jit_end
| jae ->exit_jit
|=>upb_getpclabel(plan, h, ENDOFMSG):
// We are at end-of-submsg: call endmsg handler (if any):
upb_func *endmsg = upb_handlers_gethandler(h, UPB_ENDMSG_SELECTOR);
if (endmsg) {
// void endmsg(void *closure, const void *hd, upb_status *status) {
| mov ARG1_64, CLOSURE
| loadarg2 upb_handlers_gethandlerdata(h, UPB_ENDMSG_SELECTOR)
| mov ARG3_64, DECODER->sink
| mov ARG3_64, SINK:ARG3_64->pipeline_
| add ARG3_64, offsetof(upb_pipeline, status_)
| callp endmsg
}
| leave
| ret
}
static void upb_decoderplan_jit(decoderplan *plan) {
// The JIT prologue/epilogue trampoline that is generated in this function
// does not depend on the handlers, so it will never vary. Ideally we would
// put it in an object file and just link it into upb so we could have only a
// single copy of it instead of one copy for each decoderplan. But our
// options for doing that are undesirable: GCC inline assembly is
// complicated, not portable to other compilers, and comes with subtle
// caveats about incorrect things what the optimizer might do if you eg.
// execute non-local jumps. Putting this code in a .s file would force us to
// calculate the structure offsets ourself instead of symbolically
// (ie. [r15 + 0xcd] instead of DECODER->ptr). So we tolerate a bit of
// unnecessary duplication/redundancy.
asmlabel(plan, "upb_jit_trampoline");
| push rbp
| mov rbp, rsp
| push r15
| push r14
| push r13
| push r12
| push rbx
// Align stack.
| sub rsp, 8
| mov DECODER, ARG1_64
| mov DECODER->saved_rbp, rbp
| mov FRAME, DECODER:ARG1_64->top
| mov rax, DECODER:ARG1_64->sink
| mov SINKFRAME, SINK:rax->top
| mov CLOSURE, SINKFRAME->closure
| mov PTR, DECODER->ptr
// TODO: push return addresses for re-entry (will be necessary for multiple
// buffer support).
| call ARG2_64
asmlabel(plan, "exitjit");
|->exit_jit:
| mov rbp, DECODER->saved_rbp
| lea rsp, [rbp - 48]
// Counter previous alignment.
| add rsp, 8
| pop rbx
| pop r12
| pop r13
| pop r14
| pop r15
| leave
| ret
upb_inttable_iter i;
upb_inttable_begin(&i, &plan->msginfo);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
const upb_handlers *h = (const upb_handlers*)upb_inttable_iter_key(&i);
upb_decoderplan_jit_msg(plan, h);
}
}
static void upb_decoderplan_jit_assignpclabels(decoderplan *plan,
const upb_handlers *h) {
// Limit the DFS.
if (upb_inttable_lookupptr(&plan->pclabels, h, NULL)) return;
upb_inttable_insertptr(&plan->pclabels, h,
upb_value_uint32(plan->pclabel_count));
plan->pclabel_count += TOTAL_MSG_PCLABELS;
upb_jitmsginfo *info = malloc(sizeof(*info));
info->max_field_number = 0;
upb_inttable_insertptr(&plan->msginfo, h, upb_value_ptr(info));
upb_msg_iter i;
upb_msg_begin(&i, upb_handlers_msgdef(h));
for(; !upb_msg_done(&i); upb_msg_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
info->max_field_number =
UPB_MAX(info->max_field_number, upb_fielddef_number(f));
upb_inttable_insertptr(&plan->pclabels, f,
upb_value_uint32(plan->pclabel_count));
plan->pclabel_count += TOTAL_FIELD_PCLABELS;
// Discover the whole graph of handlers depth-first. We will probably
// revise this later to be more explicit about the list of handlers that
// the plan should include.
if (upb_fielddef_issubmsg(f)) {
const upb_handlers *subh = upb_handlers_getsubhandlers(h, f);
if (subh) upb_decoderplan_jit_assignpclabels(plan, subh);
}
}
// TODO: support large field numbers by either using a hash table or
// generating code for a binary search. For now large field numbers
// will just fall back to the table decoder.
info->max_field_number = UPB_MIN(info->max_field_number, 16000);
info->tablearray = malloc((info->max_field_number + 1) * sizeof(void*));
}
static void upb_decoderplan_makejit(decoderplan *plan) {
upb_inttable_init(&plan->msginfo, UPB_CTYPE_PTR);
plan->debug_info = NULL;
// Assign pclabels.
plan->pclabel_count = 0;
upb_inttable_init(&plan->pclabels, UPB_CTYPE_UINT32);
upb_decoderplan_jit_assignpclabels(plan, plan->dest_handlers);
upb_inttable_init(&plan->asmlabels, UPB_CTYPE_PTR);
void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals));
dasm_init(plan, 1);
dasm_setupglobal(plan, globals, UPB_JIT_GLOBAL__MAX);
dasm_growpc(plan, plan->pclabel_count);
dasm_setup(plan, upb_jit_actionlist);
upb_decoderplan_jit(plan);
int dasm_status = dasm_link(plan, &plan->jit_size);
(void)dasm_status;
assert(dasm_status == DASM_S_OK);
plan->jit_code = mmap(NULL, plan->jit_size, PROT_READ | PROT_WRITE,
MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
upb_reg_jit_gdb(plan);
dasm_encode(plan, plan->jit_code);
// Create dispatch tables.
upb_inttable_iter i;
upb_inttable_begin(&i, &plan->msginfo);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
const upb_handlers *h = (const upb_handlers*)upb_inttable_iter_key(&i);
upb_jitmsginfo *mi = upb_getmsginfo(plan, h);
// We jump to after the startmsg handler since it is called before entering
// the JIT (either by upb_pbdecoder or by a previous call to the JIT).
mi->jit_func = plan->jit_code +
dasm_getpclabel(plan, upb_getpclabel(plan, h, AFTER_STARTMSG));
for (uint32_t j = 0; j <= mi->max_field_number; j++) {
const upb_fielddef *f = upb_msgdef_itof(upb_handlers_msgdef(h), j);
if (f) {
mi->tablearray[j] = plan->jit_code +
dasm_getpclabel(plan, upb_getpclabel(plan, f, FIELD));
} else {
// TODO: extend the JIT to handle unknown fields.
// For the moment we exit the JIT for any unknown field.
mi->tablearray[j] = globals[UPB_JIT_GLOBAL_exit_jit];
}
}
}
upb_inttable_uninit(&plan->pclabels);
mprotect(plan->jit_code, plan->jit_size, PROT_EXEC | PROT_READ);
#ifndef NDEBUG
// Dump to a .o file in /tmp, for easy inspection.
// Convert all asm labels from pclabel offsets to machine code offsets.
upb_inttable mclabels;
upb_inttable_init(&mclabels, UPB_CTYPE_PTR);
upb_inttable_begin(&i, &plan->asmlabels);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
upb_inttable_insert(
&mclabels,
dasm_getpclabel(plan, upb_inttable_iter_key(&i)),
upb_inttable_iter_value(&i));
}
FILE *f = fopen("/tmp/upb-jit-code.s", "w");
if (f) {
fputs(" .text", f);
size_t linelen = 0;
for (size_t i = 0; i < plan->jit_size; i++) {
upb_value v;
if (upb_inttable_lookup(&mclabels, i, &v)) {
const char *label = upb_value_getptr(v);
fprintf(f, "\n\n_%s:\n", label);
fprintf(f, " .globl _%s", label);
linelen = 1000;
}
if (linelen >= 77) {
linelen = fprintf(f, "\n .byte %u", plan->jit_code[i]);
} else {
linelen += fprintf(f, ",%u", plan->jit_code[i]);
}
}
fputs("\n", f);
fclose(f);
} else {
fprintf(stderr, "Couldn't open /tmp/upb-jit-code.s for writing/\n");
}
upb_inttable_uninit(&mclabels);
#endif
upb_inttable_begin(&i, &plan->asmlabels);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
free(upb_value_getptr(upb_inttable_iter_value(&i)));
}
upb_inttable_uninit(&plan->asmlabels);
dasm_free(plan);
free(globals);
}
static void upb_decoderplan_freejit(decoderplan *plan) {
upb_inttable_iter i;
upb_inttable_begin(&i, &plan->msginfo);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
upb_jitmsginfo *mi = upb_value_getptr(upb_inttable_iter_value(&i));
free(mi->tablearray);
free(mi);
}
upb_inttable_uninit(&plan->msginfo);
munmap(plan->jit_code, plan->jit_size);
free(plan->debug_info);
// TODO: unregister
}
static void upb_decoder_enterjit(upb_pbdecoder *d, const decoderplan *plan) {
if (plan->jit_code &&
d->top == d->stack &&
d->sink->top == d->sink->stack &&
d->ptr && d->ptr < d->jit_end) {
#ifndef NDEBUG
register uint64_t rbx asm ("rbx") = 11;
register uint64_t r12 asm ("r12") = 12;
register uint64_t r13 asm ("r13") = 13;
register uint64_t r14 asm ("r14") = 14;
register uint64_t r15 asm ("r15") = 15;
#endif
// Decodes as many fields as possible, updating d->ptr appropriately,
// before falling through to the slow(er) path.
void (*upb_jit_decode)(upb_pbdecoder *d, void*) = (void*)plan->jit_code;
upb_jitmsginfo *mi = upb_getmsginfo(plan, plan->dest_handlers);
assert(mi);
upb_jit_decode(d, mi->jit_func);
assert(d->ptr <= d->end);
// Test that callee-save registers were properly restored.
assert(rbx == 11);
assert(r12 == 12);
assert(r13 == 13);
assert(r14 == 14);
assert(r15 == 15);
}
}