Protocol Buffers - Google's data interchange format (grpc依赖)
https://developers.google.com/protocol-buffers/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1086 lines
34 KiB
1086 lines
34 KiB
|// |
|
|// upb - a minimalist implementation of protocol buffers. |
|
|// |
|
|// Copyright (c) 2011 Google Inc. See LICENSE for details. |
|
|// Author: Josh Haberman <jhaberman@gmail.com> |
|
|// |
|
|// JIT compiler for upb_pbdecoder on x86. Given a decoderplan object (which |
|
|// contains an embedded set of upb_handlers), generates code specialized to |
|
|// parsing the specific message and calling specific handlers. |
|
|// |
|
|// Since the JIT can call other functions (the JIT'ted code is not a leaf |
|
|// function) we must respect alignment rules. All x86-64 systems require |
|
|// 16-byte stack alignment. |
|
|
|
#define _GNU_SOURCE |
|
#include <stdio.h> |
|
#include <sys/mman.h> |
|
#include "dynasm/dasm_x86.h" |
|
#include "upb/shim/shim.h" |
|
|
|
#ifndef MAP_ANONYMOUS |
|
# define MAP_ANONYMOUS MAP_ANON |
|
#endif |
|
|
|
// We map into the low 32 bits when we can, but if this is not available |
|
// (like on OS X) we take what we can get. It's not required for correctness, |
|
// it's just a performance thing that makes it more likely that our jumps |
|
// can be rel32 (i.e. within 32-bits of our pc) instead of the longer |
|
// sequence required for other jumps (see callp). |
|
#ifndef MAP_32BIT |
|
#define MAP_32BIT 0 |
|
#endif |
|
|
|
// These are used to track jump targets for messages and fields. |
|
enum { |
|
STARTMSG = 0, |
|
AFTER_STARTMSG = 1, |
|
ENDOFBUF = 2, |
|
ENDOFMSG = 3, |
|
DYNDISPATCH = 4, |
|
TOTAL_MSG_PCLABELS = 5, |
|
}; |
|
|
|
enum { |
|
FIELD = 0, |
|
FIELD_NO_TYPECHECK = 1, |
|
TOTAL_FIELD_PCLABELS = 2, |
|
}; |
|
|
|
typedef struct { |
|
uint32_t max_field_number; |
|
// Currently keyed on field number. Could also try keying it |
|
// on encoded or decoded tag, or on encoded field number. |
|
void **tablearray; |
|
// Pointer to the JIT code for parsing this message. |
|
void *jit_func; |
|
} upb_jitmsginfo; |
|
|
|
static uint32_t upb_getpclabel(decoderplan *plan, const void *obj, int n) { |
|
upb_value v; |
|
bool found = upb_inttable_lookupptr(&plan->pclabels, obj, &v); |
|
UPB_ASSERT_VAR(found, found); |
|
return upb_value_getuint32(v) + n; |
|
} |
|
|
|
static upb_jitmsginfo *upb_getmsginfo(const decoderplan *plan, |
|
const upb_handlers *h) { |
|
upb_value v; |
|
bool found = upb_inttable_lookupptr(&plan->msginfo, h, &v); |
|
UPB_ASSERT_VAR(found, found); |
|
return upb_value_getptr(v); |
|
} |
|
|
|
// To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code |
|
// at runtime. GDB 7.x+ has defined an interface for doing this, and these |
|
// structure/function defintions are copied out of gdb/jit.h |
|
// |
|
// We need to give GDB an ELF file at runtime describing the symbols we have |
|
// generated. To avoid implementing the ELF format, we generate an ELF file |
|
// at compile-time and compile it in as a character string. We can replace |
|
// a few key constants (address of JIT-ted function and its size) by looking |
|
// for a few magic numbers and doing a dumb string replacement. |
|
|
|
#ifndef __APPLE__ |
|
const unsigned char upb_jit_debug_elf_file[] = { |
|
#include "upb/pb/jit_debug_elf_file.h" |
|
}; |
|
|
|
typedef enum |
|
{ |
|
GDB_JIT_NOACTION = 0, |
|
GDB_JIT_REGISTER, |
|
GDB_JIT_UNREGISTER |
|
} jit_actions_t; |
|
|
|
typedef struct gdb_jit_entry { |
|
struct gdb_jit_entry *next_entry; |
|
struct gdb_jit_entry *prev_entry; |
|
const char *symfile_addr; |
|
uint64_t symfile_size; |
|
} gdb_jit_entry; |
|
|
|
typedef struct { |
|
uint32_t version; |
|
uint32_t action_flag; |
|
gdb_jit_entry *relevant_entry; |
|
gdb_jit_entry *first_entry; |
|
} gdb_jit_descriptor; |
|
|
|
gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL}; |
|
|
|
void __attribute__((noinline)) __jit_debug_register_code() { |
|
__asm__ __volatile__(""); |
|
} |
|
|
|
void upb_reg_jit_gdb(decoderplan *plan) { |
|
// Create debug info. |
|
size_t elf_len = sizeof(upb_jit_debug_elf_file); |
|
plan->debug_info = malloc(elf_len); |
|
memcpy(plan->debug_info, upb_jit_debug_elf_file, elf_len); |
|
uint64_t *p = (void*)plan->debug_info; |
|
for (; (void*)(p+1) <= (void*)plan->debug_info + elf_len; ++p) { |
|
if (*p == 0x12345678) { *p = (uintptr_t)plan->jit_code; } |
|
if (*p == 0x321) { *p = plan->jit_size; } |
|
} |
|
|
|
// Register the JIT-ted code with GDB. |
|
gdb_jit_entry *e = malloc(sizeof(gdb_jit_entry)); |
|
e->next_entry = __jit_debug_descriptor.first_entry; |
|
e->prev_entry = NULL; |
|
if (e->next_entry) e->next_entry->prev_entry = e; |
|
e->symfile_addr = plan->debug_info; |
|
e->symfile_size = elf_len; |
|
__jit_debug_descriptor.first_entry = e; |
|
__jit_debug_descriptor.relevant_entry = e; |
|
__jit_debug_descriptor.action_flag = GDB_JIT_REGISTER; |
|
__jit_debug_register_code(); |
|
} |
|
|
|
#else |
|
|
|
void upb_reg_jit_gdb(decoderplan *plan) { |
|
(void)plan; |
|
} |
|
|
|
#endif |
|
|
|
// Has to be a separate function, otherwise GCC will complain about |
|
// expressions like (&foo != NULL) because they will never evaluate |
|
// to false. |
|
static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } |
|
|
|
|.arch x64 |
|
|.actionlist upb_jit_actionlist |
|
|.globals UPB_JIT_GLOBAL_ |
|
|.globalnames upb_jit_globalnames |
|
| |
|
|// Calling conventions. Note -- this will need to be changed for |
|
|// Windows, which uses a different calling convention! |
|
|.define ARG1_64, rdi |
|
|.define ARG2_8, r6b // DynASM's equivalent to "sil" -- low byte of esi. |
|
|.define ARG2_32, esi |
|
|.define ARG2_64, rsi |
|
|.define ARG3_32, edx |
|
|.define ARG3_64, rdx |
|
|.define ARG4_32, ecx |
|
|.define ARG4_64, rcx |
|
|.define XMMARG1, xmm0 |
|
|
|
| |
|
|// Register allocation / type map. |
|
|// ALL of the code in this file uses these register allocations. |
|
|// When we "call" within this file, we do not use regular calling |
|
|// conventions, but of course when calling to user callbacks we must. |
|
|.define PTR, rbx // Writing this to DECODER->ptr commits our progress. |
|
|.define CLOSURE, r12 |
|
|.type SINKFRAME, upb_sinkframe, r13 |
|
|.type FRAME, frame, r14 |
|
|.type DECODER, upb_pbdecoder, r15 |
|
|.type SINK, upb_sink |
|
| |
|
|.macro callp, addr |
|
|| upb_assert_notnull(addr); |
|
|// TODO(haberman): fix this. I believe the predicate we should actually be |
|
|// testing is whether the jump distance is greater than INT32_MAX, not the |
|
|// absolute address of the target. |
|
|| if ((uintptr_t)addr < 0xffffffff) { |
|
| call &addr |
|
|| } else { |
|
| mov64 rax, (uintptr_t)addr |
|
| call rax |
|
|| } |
|
|.endmacro |
|
| |
|
|.macro loadarg2, val |
|
||{ |
|
|| uintptr_t data = (uintptr_t)val; |
|
|| if (data > 0xffffffff) { |
|
| mov64 ARG2_64, data |
|
|| } else if (data) { |
|
| mov ARG2_32, data |
|
|| } else { |
|
| xor ARG2_32, ARG2_32 |
|
|| } |
|
|| } |
|
|.endmacro |
|
| |
|
|.macro load_handler_data, h, f, type |
|
| loadarg2 gethandlerdata(h, f, type) |
|
|.endmacro |
|
| |
|
|// Checkpoints our progress by writing PTR to DECODER, and |
|
|// checks for end-of-buffer. |
|
|.macro checkpoint, h |
|
| mov DECODER->ptr, PTR |
|
| cmp PTR, DECODER->effective_end |
|
| jae =>upb_getpclabel(plan, h, ENDOFBUF) |
|
|.endmacro |
|
| |
|
|.macro check_bool_ret |
|
| test al, al |
|
| jz ->exit_jit |
|
|.endmacro |
|
| |
|
|.macro check_ptr_ret |
|
| test rax, rax |
|
| jz ->exit_jit |
|
|.endmacro |
|
| |
|
|// Decodes varint into ARG2. |
|
|// Inputs: |
|
|// - ecx: first 4 bytes of varint |
|
|// - offset: offset from PTR where varint begins |
|
|// Outputs: |
|
|// - ARG2: contains decoded varint |
|
|// - rax: new PTR |
|
|.macro decode_loaded_varint, offset |
|
| // Check for <=2 bytes inline, otherwise jump to 2-10 byte decoder. |
|
| lea rax, [PTR + offset + 1] |
|
| mov ARG2_32, ecx |
|
| and ARG2_32, 0x7f |
|
| test cl, cl |
|
| jns >9 |
|
| lea rax, [PTR + offset + 2] |
|
| movzx edx, ch |
|
| and edx, 0x7f |
|
| shl edx, 7 |
|
| or ARG2_32, edx |
|
| test cx, cx |
|
| jns >9 |
|
| mov ARG1_64, rax |
|
|// XXX: I don't think this handles 64-bit values correctly. |
|
|// Test with UINT64_MAX |
|
| callp upb_vdecode_max8_fast |
|
|// rax return from function will contain new pointer |
|
| mov ARG2_64, rdx |
|
| check_ptr_ret // Check for unterminated, >10-byte varint. |
|
|9: |
|
|.endmacro |
|
| |
|
|.macro decode_varint, offset |
|
| mov ecx, dword [PTR + offset] |
|
| decode_loaded_varint offset |
|
| mov PTR, rax |
|
|.endmacro |
|
| |
|
|// Table-based field dispatch. |
|
|// Inputs: |
|
|// - ecx: first 4 bytes of tag |
|
|// Outputs: |
|
|// - edx: field number |
|
|// - esi: wire type |
|
|// Could specialize this by avoiding the value masking: could just key the |
|
|// table on the raw (length-masked) varint to save 3-4 cycles of latency. |
|
|// Currently only support tables where all entries are in the array part. |
|
|.macro dyndispatch_, h |
|
|| asmlabel(plan, "_UPB_MCODE_DISPATCH_%s.%d", |
|
|| upb_msgdef_fullname(upb_handlers_msgdef(h)), rand()); |
|
|=>upb_getpclabel(plan, h, DYNDISPATCH): |
|
| decode_loaded_varint, 0 |
|
| mov ecx, esi |
|
| shr ecx, 3 |
|
| and esi, 0x7 // Note: this value is used in the FIELD pclabel below. |
|
| cmp esi, UPB_WIRE_TYPE_END_GROUP |
|
| je >1 |
|
|| upb_jitmsginfo *mi = upb_getmsginfo(plan, h); |
|
| cmp ecx, mi->max_field_number // Bounds-check the field. |
|
| ja ->exit_jit // In the future; could be unknown label |
|
|| if ((uintptr_t)mi->tablearray < 0xffffffff) { |
|
| // TODO: support hybrid array/hash tables. |
|
| mov rax, qword [rcx*8 + mi->tablearray] |
|
|| } else { |
|
| mov64 rax, (uintptr_t)mi->tablearray |
|
| mov rax, qword [rax + rcx*8] |
|
|| } |
|
| jmp rax // Dispatch: unpredictable jump. |
|
|1: |
|
|// End group. |
|
| cmp ecx, FRAME->group_fieldnum |
|
| jne ->exit_jit // Unexpected END_GROUP tag. |
|
| mov PTR, rax // rax came from decode_loaded_varint |
|
| mov DECODER->ptr, PTR |
|
| jmp =>upb_getpclabel(plan, h, ENDOFMSG) |
|
|.endmacro |
|
| |
|
|.if 1 |
|
| // Replicated dispatch: larger code, but better branch prediction. |
|
| .define dyndispatch, dyndispatch_ |
|
|.else |
|
| // Single dispatch: smaller code, could be faster because of reduced |
|
| // icache usage. We keep this around to allow for easy comparison between |
|
| // the two. |
|
| .macro dyndispatch, h |
|
| jmp =>upb_getpclabel(plan, h, DYNDISPATCH) |
|
| .endmacro |
|
|.endif |
|
| |
|
|.macro pushsinkframe, handlers, field, endtype |
|
| mov rax, DECODER->sink |
|
| mov dword SINKFRAME->selector, getselector(field, endtype) |
|
| lea rcx, [SINKFRAME + sizeof(upb_sinkframe)] // rcx for short addressing |
|
| cmp rcx, SINK:rax->limit |
|
| jae ->exit_jit // Frame stack overflow. |
|
| mov64 r9, (uintptr_t)handlers |
|
| mov SINKFRAME:rcx->h, r9 |
|
| mov SINKFRAME:rcx->closure, CLOSURE |
|
| mov SINK:rax->top, rcx |
|
| mov SINKFRAME, rcx |
|
|.endmacro |
|
| |
|
|.macro popsinkframe |
|
| sub SINKFRAME, sizeof(upb_sinkframe) |
|
| mov rax, DECODER->sink |
|
| mov SINK:rax->top, SINKFRAME |
|
| mov CLOSURE, SINKFRAME->closure |
|
|.endmacro |
|
| |
|
|// Push a stack frame (not the CPU stack, the upb_pbdecoder stack). |
|
|.macro pushframe, handlers, field, end_offset_, endtype |
|
|// Decoder Frame. |
|
| lea rax, [FRAME + sizeof(frame)] // rax for short addressing |
|
| cmp rax, DECODER->limit |
|
| jae ->exit_jit // Frame stack overflow. |
|
| mov64 r10, (uintptr_t)field |
|
| mov FRAME:rax->f, r10 |
|
| mov qword FRAME:rax->end_ofs, end_offset_ |
|
| mov byte FRAME:rax->is_sequence, (endtype == UPB_HANDLER_ENDSEQ) |
|
| mov byte FRAME:rax->is_packed, 0 |
|
|| if (upb_fielddef_istagdelim(field) && endtype == UPB_HANDLER_ENDSUBMSG) { |
|
| mov dword FRAME:rax->group_fieldnum, upb_fielddef_number(field) |
|
|| } else { |
|
| mov dword FRAME:rax->group_fieldnum, 0xffffffff |
|
|| } |
|
| mov DECODER->top, rax |
|
| mov FRAME, rax |
|
| pushsinkframe handlers, field, endtype |
|
|.endmacro |
|
| |
|
|.macro popframe |
|
| sub FRAME, sizeof(frame) |
|
| mov DECODER->top, FRAME |
|
| popsinkframe |
|
| setmsgend |
|
|.endmacro |
|
| |
|
|.macro setmsgend |
|
| mov rsi, DECODER->jit_end |
|
| mov rax, qword FRAME->end_ofs // Will be UINT64_MAX for groups. |
|
| sub rax, qword DECODER->bufstart_ofs |
|
| add rax, qword DECODER->buf // rax = d->buf + f->end_ofs - d->bufstart_ofs |
|
| jc >8 // If the addition overflowed, use jit_end |
|
| cmp rax, rsi |
|
| ja >8 // If jit_end is less, use jit_end |
|
| mov rsi, rax // Use frame end. |
|
|8: |
|
| mov DECODER->effective_end, rsi |
|
|.endmacro |
|
| |
|
|// rcx contains the tag, compare it against "tag", but since it is a varint |
|
|// we must only compare as many bytes as actually have data. |
|
|.macro checktag, tag |
|
|| switch (upb_value_size(tag)) { |
|
|| case 1: |
|
| cmp cl, tag |
|
|| break; |
|
|| case 2: |
|
| cmp cx, tag |
|
|| break; |
|
|| case 3: |
|
| and ecx, 0xffffff // 3 bytes |
|
| cmp rcx, tag |
|
|| case 4: |
|
| cmp ecx, tag |
|
|| break; |
|
|| case 5: |
|
| mov64 rdx, 0xffffffffff // 5 bytes |
|
| and rcx, rdx |
|
| cmp rcx, tag |
|
|| break; |
|
|| default: abort(); |
|
|| } |
|
|.endmacro |
|
| |
|
|.macro sethas, reg, hasbit |
|
|| if (hasbit >= 0) { |
|
| or byte [reg + ((uint32_t)hasbit / 8)], (1 << ((uint32_t)hasbit % 8)) |
|
|| } |
|
|.endmacro |
|
|
|
|
|
#include <stdlib.h> |
|
#include "upb/pb/varint.h" |
|
|
|
static upb_func *gethandler(const upb_handlers *h, const upb_fielddef *f, |
|
upb_handlertype_t type) { |
|
return upb_handlers_gethandler(h, getselector(f, type)); |
|
} |
|
|
|
static uintptr_t gethandlerdata(const upb_handlers *h, const upb_fielddef *f, |
|
upb_handlertype_t type) { |
|
return (uintptr_t)upb_handlers_gethandlerdata(h, getselector(f, type)); |
|
} |
|
|
|
static void asmlabel(decoderplan *plan, const char *fmt, ...) { |
|
va_list ap; |
|
va_start(ap, fmt); |
|
char *str = NULL; |
|
size_t size = 0; |
|
upb_vrprintf(&str, &size, 0, fmt, ap); |
|
va_end(ap); |
|
uint32_t label = plan->pclabel_count++; |
|
dasm_growpc(plan, plan->pclabel_count); |
|
|=>label: |
|
upb_inttable_insert(&plan->asmlabels, label, upb_value_ptr(str)); |
|
} |
|
|
|
// Decodes the next val into ARG2, advances PTR. |
|
static void upb_decoderplan_jit_decodefield(decoderplan *plan, |
|
size_t tag_size, |
|
const upb_handlers *h, |
|
const upb_fielddef *f) { |
|
// Decode the value into arg 3 for the callback. |
|
asmlabel(plan, "UPB_MCODE_DECODE_FIELD_%s.%s", |
|
upb_msgdef_fullname(upb_handlers_msgdef(h)), |
|
upb_fielddef_name(f)); |
|
switch (upb_fielddef_descriptortype(f)) { |
|
case UPB_DESCRIPTOR_TYPE_DOUBLE: |
|
| movsd XMMARG1, qword [PTR + tag_size] |
|
| add PTR, 8 + tag_size |
|
break; |
|
|
|
case UPB_DESCRIPTOR_TYPE_FIXED64: |
|
case UPB_DESCRIPTOR_TYPE_SFIXED64: |
|
| mov ARG2_64, qword [PTR + tag_size] |
|
| add PTR, 8 + tag_size |
|
break; |
|
|
|
case UPB_DESCRIPTOR_TYPE_FLOAT: |
|
| movss XMMARG1, dword [PTR + tag_size] |
|
| add PTR, 4 + tag_size |
|
break; |
|
|
|
case UPB_DESCRIPTOR_TYPE_FIXED32: |
|
case UPB_DESCRIPTOR_TYPE_SFIXED32: |
|
| mov ARG2_32, dword [PTR + tag_size] |
|
| add PTR, 4 + tag_size |
|
break; |
|
|
|
case UPB_DESCRIPTOR_TYPE_BOOL: |
|
// Can't assume it's one byte long, because bool must be wire-compatible |
|
// with all of the varint integer types. |
|
| decode_varint tag_size |
|
| test ARG2_64, ARG2_64 |
|
| setne al |
|
| movzx ARG2_32, al |
|
break; |
|
|
|
case UPB_DESCRIPTOR_TYPE_INT64: |
|
case UPB_DESCRIPTOR_TYPE_UINT64: |
|
case UPB_DESCRIPTOR_TYPE_INT32: |
|
case UPB_DESCRIPTOR_TYPE_UINT32: |
|
case UPB_DESCRIPTOR_TYPE_ENUM: |
|
| decode_varint tag_size |
|
break; |
|
|
|
case UPB_DESCRIPTOR_TYPE_SINT64: |
|
// 64-bit zig-zag decoding. |
|
| decode_varint tag_size |
|
| mov rax, ARG2_64 |
|
| shr ARG2_64, 1 |
|
| and rax, 1 |
|
| neg rax |
|
| xor ARG2_64, rax |
|
break; |
|
|
|
case UPB_DESCRIPTOR_TYPE_SINT32: |
|
// 32-bit zig-zag decoding. |
|
| decode_varint tag_size |
|
| mov eax, ARG2_32 |
|
| shr ARG2_32, 1 |
|
| and eax, 1 |
|
| neg eax |
|
| xor ARG2_32, eax |
|
break; |
|
|
|
case UPB_DESCRIPTOR_TYPE_STRING: |
|
case UPB_DESCRIPTOR_TYPE_BYTES: { |
|
// We only handle the case where the entire string is in our current |
|
// buf, which sidesteps any security problems. The C path has more |
|
// robust checks. |
|
| mov ecx, dword [PTR + tag_size] |
|
| decode_loaded_varint tag_size |
|
| mov rdi, DECODER->end |
|
| sub rdi, rax |
|
| cmp ARG2_64, rdi // if (len > d->end - str) |
|
| ja ->exit_jit // Can't deliver, whole string not in buf. |
|
| mov PTR, rax |
|
|
|
upb_func *handler = gethandler(h, f, UPB_HANDLER_STARTSTR); |
|
if (handler) { |
|
// void* startstr(void *c, const void *hd, size_t hint) |
|
| mov DECODER->tmp_len, ARG2_32 |
|
| mov ARG1_64, CLOSURE |
|
| mov ARG3_64, ARG2_64 |
|
| load_handler_data h, f, UPB_HANDLER_STARTSTR |
|
| callp handler |
|
| check_ptr_ret |
|
| mov ARG1_64, rax // sub-closure |
|
| mov ARG4_32, DECODER->tmp_len |
|
} else { |
|
| mov ARG1_64, CLOSURE |
|
| mov ARG4_64, ARG2_64 |
|
} |
|
|
|
handler = gethandler(h, f, UPB_HANDLER_STRING); |
|
if (handler) { |
|
// size_t str(void *c, const void *hd, const char *buf, size_t len) |
|
| load_handler_data h, f, UPB_HANDLER_STRING |
|
| mov ARG3_64, PTR |
|
| callp handler |
|
// TODO: properly handle returns other than "n" (the whole string). |
|
| add PTR, rax |
|
} else { |
|
| add PTR, ARG4_64 |
|
} |
|
|
|
handler = gethandler(h, f, UPB_HANDLER_ENDSTR); |
|
if (handler) { |
|
// bool endstr(const upb_sinkframe *frame); |
|
| mov ARG1_64, CLOSURE |
|
| load_handler_data h, f, UPB_HANDLER_ENDSTR |
|
| callp handler |
|
| check_bool_ret |
|
} |
|
break; |
|
} |
|
|
|
// Will dispatch callbacks and call submessage in a second. |
|
case UPB_DESCRIPTOR_TYPE_MESSAGE: |
|
| decode_varint tag_size |
|
break; |
|
case UPB_DESCRIPTOR_TYPE_GROUP: |
|
| add PTR, tag_size |
|
break; |
|
|
|
default: abort(); |
|
} |
|
} |
|
|
|
static void upb_decoderplan_jit_callcb(decoderplan *plan, |
|
const upb_handlers *h, |
|
const upb_fielddef *f) { |
|
// Call callbacks. Specializing the append accessors didn't yield a speed |
|
// increase in benchmarks. |
|
asmlabel(plan, "UPB_MCODE_CALLCB_%s.%s", |
|
upb_msgdef_fullname(upb_handlers_msgdef(h)), |
|
upb_fielddef_name(f)); |
|
if (upb_fielddef_issubmsg(f)) { |
|
// Call startsubmsg handler (if any). |
|
upb_func *startsubmsg = gethandler(h, f, UPB_HANDLER_STARTSUBMSG); |
|
if (startsubmsg) { |
|
// upb_sflow_t startsubmsg(const upb_sinkframe *frame) |
|
| mov DECODER->tmp_len, ARG2_32 |
|
| mov ARG1_64, CLOSURE |
|
| load_handler_data h, f, UPB_HANDLER_STARTSUBMSG |
|
| callp startsubmsg |
|
| check_ptr_ret |
|
| mov CLOSURE, rax |
|
} |
|
|
|
const upb_handlers *sub_h = upb_handlers_getsubhandlers(h, f); |
|
if (sub_h) { |
|
if (upb_fielddef_istagdelim(f)) { |
|
| mov rdx, UPB_NONDELIMITED |
|
} else { |
|
| mov esi, DECODER->tmp_len |
|
| mov rdx, PTR |
|
| sub rdx, DECODER->buf |
|
| add rdx, DECODER->bufstart_ofs |
|
| add rdx, rsi // = d->bufstart_ofs + (d->ptr - d->buf) + delim_len |
|
} |
|
| pushframe sub_h, f, rdx, UPB_HANDLER_ENDSUBMSG |
|
| call =>upb_getpclabel(plan, sub_h, STARTMSG) |
|
| popframe |
|
} else { |
|
if (upb_fielddef_istagdelim(f)) { |
|
// Groups with no handlers not supported yet. |
|
assert(false); |
|
} else { |
|
| mov esi, DECODER->tmp_len |
|
| add PTR, rsi |
|
} |
|
} |
|
|
|
// Call endsubmsg handler (if any). |
|
upb_func *endsubmsg = gethandler(h, f, UPB_HANDLER_ENDSUBMSG); |
|
if (endsubmsg) { |
|
// upb_flow_t endsubmsg(void *closure, upb_value fval); |
|
| mov ARG1_64, CLOSURE |
|
| load_handler_data h, f, UPB_HANDLER_ENDSUBMSG |
|
| callp endsubmsg |
|
| check_bool_ret |
|
} |
|
} else if (!upb_fielddef_isstring(f)) { |
|
upb_handlertype_t handlertype = upb_handlers_getprimitivehandlertype(f); |
|
upb_selector_t sel = getselector(f, handlertype); |
|
upb_func *handler = gethandler(h, f, handlertype); |
|
const upb_shim_data *data = upb_shim_getdata(h, sel); |
|
if (data) { |
|
switch (upb_fielddef_type(f)) { |
|
case UPB_TYPE_INT64: |
|
case UPB_TYPE_UINT64: |
|
| mov [CLOSURE + data->offset], ARG2_64 |
|
break; |
|
case UPB_TYPE_INT32: |
|
case UPB_TYPE_UINT32: |
|
case UPB_TYPE_ENUM: |
|
| mov [CLOSURE + data->offset], ARG2_32 |
|
break; |
|
case UPB_TYPE_DOUBLE: |
|
| movsd qword [CLOSURE + data->offset], XMMARG1 |
|
break; |
|
case UPB_TYPE_FLOAT: |
|
| movss dword [CLOSURE + data->offset], XMMARG1 |
|
break; |
|
case UPB_TYPE_BOOL: |
|
| mov [CLOSURE + data->offset], ARG2_8 |
|
break; |
|
case UPB_TYPE_STRING: |
|
case UPB_TYPE_BYTES: |
|
case UPB_TYPE_MESSAGE: |
|
assert(false); break; |
|
} |
|
| sethas CLOSURE, data->hasbit |
|
} else if (handler) { |
|
// bool value(const upb_sinkframe* frame, ctype val) |
|
| mov ARG1_64, CLOSURE |
|
| mov ARG3_64, ARG2_64 |
|
| load_handler_data h, f, handlertype |
|
| callp handler |
|
| check_bool_ret |
|
} |
|
} |
|
} |
|
|
|
static uint64_t upb_get_encoded_tag(const upb_fielddef *f) { |
|
uint32_t tag = (upb_fielddef_number(f) << 3) | |
|
upb_decoder_types[upb_fielddef_descriptortype(f)].native_wire_type; |
|
uint64_t encoded_tag = upb_vencode32(tag); |
|
// No tag should be greater than 5 bytes. |
|
assert(encoded_tag <= 0xffffffffff); |
|
return encoded_tag; |
|
} |
|
|
|
static void upb_decoderplan_jit_endseq(decoderplan *plan, |
|
const upb_handlers *h, |
|
const upb_fielddef *f) { |
|
| popframe |
|
upb_func *endseq = gethandler(h, f, UPB_HANDLER_ENDSEQ); |
|
if (endseq) { |
|
| mov ARG1_64, CLOSURE |
|
| load_handler_data h, f, UPB_HANDLER_ENDSEQ |
|
| callp endseq |
|
} |
|
} |
|
|
|
// PTR should point to the beginning of the tag. |
|
static void upb_decoderplan_jit_field(decoderplan *plan, |
|
const upb_handlers *h, |
|
const upb_fielddef *f, |
|
const upb_fielddef *next_f) { |
|
asmlabel(plan, "UPB_MCODE_FIELD_%s.%s", |
|
upb_msgdef_fullname(upb_handlers_msgdef(h)), |
|
upb_fielddef_name(f)); |
|
uint64_t tag = upb_get_encoded_tag(f); |
|
uint64_t next_tag = next_f ? upb_get_encoded_tag(next_f) : 0; |
|
int tag_size = upb_value_size(tag); |
|
|
|
// PC-label for the dispatch table. |
|
// We check the wire type (which must be loaded in edi) because the |
|
// table is keyed on field number, not type. |
|
|=>upb_getpclabel(plan, f, FIELD): |
|
| cmp esi, (tag & 0x7) |
|
| jne ->exit_jit // In the future: could be an unknown field or packed. |
|
|=>upb_getpclabel(plan, f, FIELD_NO_TYPECHECK): |
|
if (upb_fielddef_isseq(f)) { |
|
upb_func *startseq = gethandler(h, f, UPB_HANDLER_STARTSEQ); |
|
if (startseq) { |
|
| mov ARG1_64, CLOSURE |
|
| load_handler_data h, f, UPB_HANDLER_STARTSEQ |
|
| callp startseq |
|
| check_ptr_ret |
|
| mov CLOSURE, rax |
|
} |
|
| mov rsi, FRAME->end_ofs |
|
| pushframe h, f, rsi, UPB_HANDLER_ENDSEQ |
|
} |
|
|
|
|1: // Label for repeating this field. |
|
|
|
upb_decoderplan_jit_decodefield(plan, tag_size, h, f); |
|
upb_decoderplan_jit_callcb(plan, h, f); |
|
|
|
// This is kind of gross; future redesign should take into account how to |
|
// make this work nicely. The difficult part is that the sequence can be |
|
// broken either by end-of-message or by seeing a different field; in both |
|
// cases we need to call the endseq handler, but what we do after that |
|
// depends on which case triggered the end-of-sequence. |
|
| mov DECODER->ptr, PTR |
|
| cmp PTR, DECODER->jit_end |
|
| jae ->exit_jit |
|
| cmp PTR, DECODER->effective_end |
|
| jb >2 |
|
if (upb_fielddef_isseq(f)) { |
|
upb_decoderplan_jit_endseq(plan, h, f); |
|
} |
|
| jmp =>upb_getpclabel(plan, h, ENDOFMSG) |
|
|2: |
|
| mov rcx, qword [PTR] |
|
if (upb_fielddef_isseq(f)) { |
|
| checktag tag |
|
| je <1 |
|
upb_decoderplan_jit_endseq(plan, h, f); |
|
// Load next tag again (popframe/endseq clobbered it). |
|
| mov rcx, qword [PTR] |
|
} |
|
|
|
if (next_tag != 0) { |
|
| checktag next_tag |
|
| je =>upb_getpclabel(plan, next_f, FIELD_NO_TYPECHECK) |
|
} |
|
|
|
// Fall back to dynamic dispatch. |
|
| dyndispatch h |
|
} |
|
|
|
static int upb_compare_uint32(const void *a, const void *b) { |
|
return *(uint32_t*)a - *(uint32_t*)b; |
|
} |
|
|
|
static void upb_decoderplan_jit_msg(decoderplan *plan, |
|
const upb_handlers *h) { |
|
asmlabel(plan, "UPB_MCODE_DECODEMSG_%s", |
|
upb_msgdef_fullname(upb_handlers_msgdef(h))); |
|
|=>upb_getpclabel(plan, h, AFTER_STARTMSG): |
|
| push rbp |
|
| mov rbp, rsp |
|
| jmp >1 |
|
|
|
|=>upb_getpclabel(plan, h, STARTMSG): |
|
| push rbp |
|
| mov rbp, rsp |
|
|
|
// Call startmsg handler (if any): |
|
upb_func *startmsg = upb_handlers_gethandler(h, UPB_STARTMSG_SELECTOR); |
|
if (startmsg) { |
|
// upb_flow_t startmsg(void *closure, const void *hd); |
|
| mov ARG1_64, CLOSURE |
|
| loadarg2 upb_handlers_gethandlerdata(h, UPB_STARTMSG_SELECTOR) |
|
| callp startmsg |
|
| check_bool_ret |
|
} |
|
|
|
|1: |
|
| setmsgend |
|
| checkpoint h |
|
| mov ecx, dword [PTR] |
|
| dyndispatch_ h |
|
|
|
// --------- New code section (does not fall through) ------------------------ |
|
|
|
// Emit code for parsing each field (dynamic dispatch contains pointers to |
|
// all of these). |
|
|
|
// Create an ordering over the fields in field number order. |
|
// Parsing will theoretically be fastest if we emit code in the same |
|
// order as field numbers are seen on-the-wire because of an optimization |
|
// in the generated code that skips dynamic dispatch if the next field is |
|
// as expected. |
|
const upb_msgdef *md = upb_handlers_msgdef(h); |
|
int num_keys = upb_msgdef_numfields(md); |
|
uint32_t *keys = malloc(num_keys * sizeof(*keys)); |
|
int idx = 0; |
|
upb_msg_iter i; |
|
for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { |
|
keys[idx++] = upb_fielddef_number(upb_msg_iter_field(&i)); |
|
} |
|
qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32); |
|
|
|
for(int i = 0; i < num_keys; i++) { |
|
const upb_fielddef *f = upb_msgdef_itof(md, keys[i]); |
|
const upb_fielddef *next_f = |
|
(i + 1 < num_keys) ? upb_msgdef_itof(md, keys[i + 1]) : NULL; |
|
upb_decoderplan_jit_field(plan, h, f, next_f); |
|
} |
|
|
|
free(keys); |
|
|
|
// --------- New code section (does not fall through) ------------------------ |
|
|
|
// End-of-buf / end-of-message. |
|
// We hit a buffer limit; either we hit jit_end or end-of-submessage. |
|
|=>upb_getpclabel(plan, h, ENDOFBUF): |
|
| cmp PTR, DECODER->jit_end |
|
| jae ->exit_jit |
|
|
|
|=>upb_getpclabel(plan, h, ENDOFMSG): |
|
// We are at end-of-submsg: call endmsg handler (if any): |
|
upb_func *endmsg = upb_handlers_gethandler(h, UPB_ENDMSG_SELECTOR); |
|
if (endmsg) { |
|
// void endmsg(void *closure, const void *hd, upb_status *status) { |
|
| mov ARG1_64, CLOSURE |
|
| loadarg2 upb_handlers_gethandlerdata(h, UPB_ENDMSG_SELECTOR) |
|
| mov ARG3_64, DECODER->sink |
|
| mov ARG3_64, SINK:ARG3_64->pipeline_ |
|
| add ARG3_64, offsetof(upb_pipeline, status_) |
|
| callp endmsg |
|
} |
|
|
|
| leave |
|
| ret |
|
} |
|
|
|
static void upb_decoderplan_jit(decoderplan *plan) { |
|
// The JIT prologue/epilogue trampoline that is generated in this function |
|
// does not depend on the handlers, so it will never vary. Ideally we would |
|
// put it in an object file and just link it into upb so we could have only a |
|
// single copy of it instead of one copy for each decoderplan. But our |
|
// options for doing that are undesirable: GCC inline assembly is |
|
// complicated, not portable to other compilers, and comes with subtle |
|
// caveats about incorrect things what the optimizer might do if you eg. |
|
// execute non-local jumps. Putting this code in a .s file would force us to |
|
// calculate the structure offsets ourself instead of symbolically |
|
// (ie. [r15 + 0xcd] instead of DECODER->ptr). So we tolerate a bit of |
|
// unnecessary duplication/redundancy. |
|
asmlabel(plan, "upb_jit_trampoline"); |
|
| push rbp |
|
| mov rbp, rsp |
|
| push r15 |
|
| push r14 |
|
| push r13 |
|
| push r12 |
|
| push rbx |
|
// Align stack. |
|
| sub rsp, 8 |
|
| mov DECODER, ARG1_64 |
|
| mov DECODER->saved_rbp, rbp |
|
| mov FRAME, DECODER:ARG1_64->top |
|
| mov rax, DECODER:ARG1_64->sink |
|
| mov SINKFRAME, SINK:rax->top |
|
| mov CLOSURE, SINKFRAME->closure |
|
| mov PTR, DECODER->ptr |
|
|
|
// TODO: push return addresses for re-entry (will be necessary for multiple |
|
// buffer support). |
|
| call ARG2_64 |
|
asmlabel(plan, "exitjit"); |
|
|->exit_jit: |
|
| mov rbp, DECODER->saved_rbp |
|
| lea rsp, [rbp - 48] |
|
// Counter previous alignment. |
|
| add rsp, 8 |
|
| pop rbx |
|
| pop r12 |
|
| pop r13 |
|
| pop r14 |
|
| pop r15 |
|
| leave |
|
| ret |
|
|
|
upb_inttable_iter i; |
|
upb_inttable_begin(&i, &plan->msginfo); |
|
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { |
|
const upb_handlers *h = (const upb_handlers*)upb_inttable_iter_key(&i); |
|
upb_decoderplan_jit_msg(plan, h); |
|
} |
|
} |
|
|
|
static void upb_decoderplan_jit_assignpclabels(decoderplan *plan, |
|
const upb_handlers *h) { |
|
// Limit the DFS. |
|
if (upb_inttable_lookupptr(&plan->pclabels, h, NULL)) return; |
|
|
|
upb_inttable_insertptr(&plan->pclabels, h, |
|
upb_value_uint32(plan->pclabel_count)); |
|
plan->pclabel_count += TOTAL_MSG_PCLABELS; |
|
|
|
upb_jitmsginfo *info = malloc(sizeof(*info)); |
|
info->max_field_number = 0; |
|
upb_inttable_insertptr(&plan->msginfo, h, upb_value_ptr(info)); |
|
|
|
upb_msg_iter i; |
|
upb_msg_begin(&i, upb_handlers_msgdef(h)); |
|
for(; !upb_msg_done(&i); upb_msg_next(&i)) { |
|
const upb_fielddef *f = upb_msg_iter_field(&i); |
|
info->max_field_number = |
|
UPB_MAX(info->max_field_number, upb_fielddef_number(f)); |
|
upb_inttable_insertptr(&plan->pclabels, f, |
|
upb_value_uint32(plan->pclabel_count)); |
|
plan->pclabel_count += TOTAL_FIELD_PCLABELS; |
|
|
|
// Discover the whole graph of handlers depth-first. We will probably |
|
// revise this later to be more explicit about the list of handlers that |
|
// the plan should include. |
|
if (upb_fielddef_issubmsg(f)) { |
|
const upb_handlers *subh = upb_handlers_getsubhandlers(h, f); |
|
if (subh) upb_decoderplan_jit_assignpclabels(plan, subh); |
|
} |
|
} |
|
// TODO: support large field numbers by either using a hash table or |
|
// generating code for a binary search. For now large field numbers |
|
// will just fall back to the table decoder. |
|
info->max_field_number = UPB_MIN(info->max_field_number, 16000); |
|
info->tablearray = malloc((info->max_field_number + 1) * sizeof(void*)); |
|
} |
|
|
|
static void upb_decoderplan_makejit(decoderplan *plan) { |
|
upb_inttable_init(&plan->msginfo, UPB_CTYPE_PTR); |
|
plan->debug_info = NULL; |
|
|
|
// Assign pclabels. |
|
plan->pclabel_count = 0; |
|
upb_inttable_init(&plan->pclabels, UPB_CTYPE_UINT32); |
|
upb_decoderplan_jit_assignpclabels(plan, plan->dest_handlers); |
|
|
|
upb_inttable_init(&plan->asmlabels, UPB_CTYPE_PTR); |
|
|
|
void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals)); |
|
dasm_init(plan, 1); |
|
dasm_setupglobal(plan, globals, UPB_JIT_GLOBAL__MAX); |
|
dasm_growpc(plan, plan->pclabel_count); |
|
dasm_setup(plan, upb_jit_actionlist); |
|
|
|
upb_decoderplan_jit(plan); |
|
|
|
int dasm_status = dasm_link(plan, &plan->jit_size); |
|
(void)dasm_status; |
|
assert(dasm_status == DASM_S_OK); |
|
|
|
plan->jit_code = mmap(NULL, plan->jit_size, PROT_READ | PROT_WRITE, |
|
MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); |
|
|
|
upb_reg_jit_gdb(plan); |
|
|
|
dasm_encode(plan, plan->jit_code); |
|
|
|
// Create dispatch tables. |
|
upb_inttable_iter i; |
|
upb_inttable_begin(&i, &plan->msginfo); |
|
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { |
|
const upb_handlers *h = (const upb_handlers*)upb_inttable_iter_key(&i); |
|
upb_jitmsginfo *mi = upb_getmsginfo(plan, h); |
|
// We jump to after the startmsg handler since it is called before entering |
|
// the JIT (either by upb_pbdecoder or by a previous call to the JIT). |
|
mi->jit_func = plan->jit_code + |
|
dasm_getpclabel(plan, upb_getpclabel(plan, h, AFTER_STARTMSG)); |
|
for (uint32_t j = 0; j <= mi->max_field_number; j++) { |
|
const upb_fielddef *f = upb_msgdef_itof(upb_handlers_msgdef(h), j); |
|
if (f) { |
|
mi->tablearray[j] = plan->jit_code + |
|
dasm_getpclabel(plan, upb_getpclabel(plan, f, FIELD)); |
|
} else { |
|
// TODO: extend the JIT to handle unknown fields. |
|
// For the moment we exit the JIT for any unknown field. |
|
mi->tablearray[j] = globals[UPB_JIT_GLOBAL_exit_jit]; |
|
} |
|
} |
|
} |
|
|
|
upb_inttable_uninit(&plan->pclabels); |
|
|
|
mprotect(plan->jit_code, plan->jit_size, PROT_EXEC | PROT_READ); |
|
|
|
#ifndef NDEBUG |
|
// Dump to a .o file in /tmp, for easy inspection. |
|
|
|
// Convert all asm labels from pclabel offsets to machine code offsets. |
|
upb_inttable mclabels; |
|
upb_inttable_init(&mclabels, UPB_CTYPE_PTR); |
|
upb_inttable_begin(&i, &plan->asmlabels); |
|
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { |
|
upb_inttable_insert( |
|
&mclabels, |
|
dasm_getpclabel(plan, upb_inttable_iter_key(&i)), |
|
upb_inttable_iter_value(&i)); |
|
} |
|
|
|
FILE *f = fopen("/tmp/upb-jit-code.s", "w"); |
|
if (f) { |
|
fputs(" .text", f); |
|
size_t linelen = 0; |
|
for (size_t i = 0; i < plan->jit_size; i++) { |
|
upb_value v; |
|
if (upb_inttable_lookup(&mclabels, i, &v)) { |
|
const char *label = upb_value_getptr(v); |
|
fprintf(f, "\n\n_%s:\n", label); |
|
fprintf(f, " .globl _%s", label); |
|
linelen = 1000; |
|
} |
|
if (linelen >= 77) { |
|
linelen = fprintf(f, "\n .byte %u", plan->jit_code[i]); |
|
} else { |
|
linelen += fprintf(f, ",%u", plan->jit_code[i]); |
|
} |
|
} |
|
fputs("\n", f); |
|
fclose(f); |
|
} else { |
|
fprintf(stderr, "Couldn't open /tmp/upb-jit-code.s for writing/\n"); |
|
} |
|
|
|
upb_inttable_uninit(&mclabels); |
|
#endif |
|
|
|
upb_inttable_begin(&i, &plan->asmlabels); |
|
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { |
|
free(upb_value_getptr(upb_inttable_iter_value(&i))); |
|
} |
|
upb_inttable_uninit(&plan->asmlabels); |
|
|
|
dasm_free(plan); |
|
free(globals); |
|
} |
|
|
|
static void upb_decoderplan_freejit(decoderplan *plan) { |
|
upb_inttable_iter i; |
|
upb_inttable_begin(&i, &plan->msginfo); |
|
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { |
|
upb_jitmsginfo *mi = upb_value_getptr(upb_inttable_iter_value(&i)); |
|
free(mi->tablearray); |
|
free(mi); |
|
} |
|
upb_inttable_uninit(&plan->msginfo); |
|
munmap(plan->jit_code, plan->jit_size); |
|
free(plan->debug_info); |
|
// TODO: unregister |
|
} |
|
|
|
static void upb_decoder_enterjit(upb_pbdecoder *d, const decoderplan *plan) { |
|
if (plan->jit_code && |
|
d->top == d->stack && |
|
d->sink->top == d->sink->stack && |
|
d->ptr && d->ptr < d->jit_end) { |
|
#ifndef NDEBUG |
|
register uint64_t rbx asm ("rbx") = 11; |
|
register uint64_t r12 asm ("r12") = 12; |
|
register uint64_t r13 asm ("r13") = 13; |
|
register uint64_t r14 asm ("r14") = 14; |
|
register uint64_t r15 asm ("r15") = 15; |
|
#endif |
|
// Decodes as many fields as possible, updating d->ptr appropriately, |
|
// before falling through to the slow(er) path. |
|
void (*upb_jit_decode)(upb_pbdecoder *d, void*) = (void*)plan->jit_code; |
|
upb_jitmsginfo *mi = upb_getmsginfo(plan, plan->dest_handlers); |
|
assert(mi); |
|
upb_jit_decode(d, mi->jit_func); |
|
assert(d->ptr <= d->end); |
|
|
|
// Test that callee-save registers were properly restored. |
|
assert(rbx == 11); |
|
assert(r12 == 12); |
|
assert(r13 == 13); |
|
assert(r14 == 14); |
|
assert(r15 == 15); |
|
} |
|
}
|
|
|