Protocol Buffers - Google's data interchange format (grpc依赖)
https://developers.google.com/protocol-buffers/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
817 lines
24 KiB
817 lines
24 KiB
|// |
|
|// upb - a minimalist implementation of protocol buffers. |
|
|// |
|
|// Copyright (c) 2011 Google Inc. See LICENSE for details. |
|
|// Author: Josh Haberman <jhaberman@gmail.com> |
|
|// |
|
|// JIT compiler for upb_decoder on x86. Given a upb_handlers object, |
|
|// generates code specialized to parsing the specific message and |
|
|// calling specific handlers. |
|
|// |
|
|// Since the JIT can call other functions (the JIT'ted code is not a leaf |
|
|// function) we must respect alignment rules. On OS X, this means aligning |
|
|// the stack to 16 bytes. |
|
|
|
#define UPB_NONE -1 |
|
#define UPB_MULTIPLE -2 |
|
#define UPB_TOPLEVEL_ONE -3 |
|
|
|
#include <sys/mman.h> |
|
#include "dynasm/dasm_proto.h" |
|
#include "dynasm/dasm_x86.h" |
|
|
|
#ifndef MAP_ANONYMOUS |
|
# define MAP_ANONYMOUS MAP_ANON |
|
#endif |
|
|
|
// We map into the low 32 bits when we can, but if this is not available |
|
// (like on OS X) we take what we can get. It's not required for correctness, |
|
// it's just a performance thing that makes it more likely that our jumps |
|
// can be rel32 (i.e. within 32-bits of our pc) instead of the longer |
|
// sequence required for other jumps (see callp). |
|
#ifndef MAP_32BIT |
|
#define MAP_32BIT 0 |
|
#endif |
|
|
|
// To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code |
|
// at runtime. GDB 7.x+ has defined an interface for doing this, and these |
|
// structure/function defintions are copied out of gdb/jit.h |
|
// |
|
// We need to give GDB an ELF file at runtime describing the symbols we have |
|
// generated. To avoid implementing the ELF format, we generate an ELF file |
|
// at compile-time and compile it in as a character string. We can replace |
|
// a few key constants (address of JIT-ted function and its size) by looking |
|
// for a few magic numbers and doing a dumb string replacement. |
|
|
|
#ifndef __APPLE__ |
|
#include "upb/pb/jit_debug_elf_file.h" |
|
|
|
typedef enum |
|
{ |
|
GDB_JIT_NOACTION = 0, |
|
GDB_JIT_REGISTER, |
|
GDB_JIT_UNREGISTER |
|
} jit_actions_t; |
|
|
|
typedef struct gdb_jit_entry { |
|
struct gdb_jit_entry *next_entry; |
|
struct gdb_jit_entry *prev_entry; |
|
const char *symfile_addr; |
|
uint64_t symfile_size; |
|
} gdb_jit_entry; |
|
|
|
typedef struct { |
|
uint32_t version; |
|
uint32_t action_flag; |
|
gdb_jit_entry *relevant_entry; |
|
gdb_jit_entry *first_entry; |
|
} gdb_jit_descriptor; |
|
|
|
gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL}; |
|
|
|
void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); } |
|
|
|
void upb_reg_jit_gdb(upb_decoder *d) { |
|
// Create debug info. |
|
size_t elf_len = upb_pb_jit_debug_elf_file_o_len; |
|
d->debug_info = malloc(elf_len); |
|
memcpy(d->debug_info, upb_pb_jit_debug_elf_file_o, elf_len); |
|
uint64_t *p = (void*)d->debug_info; |
|
for (; (void*)(p+1) <= (void*)d->debug_info + elf_len; ++p) { |
|
if (*p == 0x12345678) { *p = (uintptr_t)d->jit_code; } |
|
if (*p == 0x321) { *p = d->jit_size; } |
|
} |
|
|
|
// Register the JIT-ted code with GDB. |
|
gdb_jit_entry *e = malloc(sizeof(gdb_jit_entry)); |
|
e->next_entry = __jit_debug_descriptor.first_entry; |
|
e->prev_entry = NULL; |
|
if (e->next_entry) e->next_entry->prev_entry = e; |
|
e->symfile_addr = d->debug_info; |
|
e->symfile_size = elf_len; |
|
__jit_debug_descriptor.first_entry = e; |
|
__jit_debug_descriptor.relevant_entry = e; |
|
__jit_debug_descriptor.action_flag = GDB_JIT_REGISTER; |
|
__jit_debug_register_code(); |
|
} |
|
|
|
#else |
|
|
|
void upb_reg_jit_gdb(upb_decoder *d) { |
|
(void)d; |
|
} |
|
|
|
#endif |
|
|
|
|.arch x64 |
|
|.actionlist upb_jit_actionlist |
|
|.globals UPB_JIT_GLOBAL_ |
|
|.globalnames upb_jit_globalnames |
|
| |
|
|// Calling conventions. Note -- this will need to be changed for |
|
|// Windows, which uses a different calling convention! |
|
|.define ARG1_64, rdi |
|
|.define ARG2_8, sil |
|
|.define ARG2_32, esi |
|
|.define ARG2_64, rsi |
|
|.define ARG3_8, dl |
|
|.define ARG3_32, edx |
|
|.define ARG3_64, rdx |
|
|.define ARG4_64, rcx |
|
|.define ARG5_32, r8d |
|
| |
|
|// Register allocation / type map. |
|
|// ALL of the code in this file uses these register allocations. |
|
|// When we "call" within this file, we do not use regular calling |
|
|// conventions, but of course when calling to user callbacks we must. |
|
|.define PTR, rbx |
|
|.define CLOSURE, r12 |
|
|.type FRAME, upb_dispatcher_frame, r13 |
|
|.type STRREF, upb_strref, r14 |
|
|.type DECODER, upb_decoder, r15 |
|
|.type STDARRAY, upb_stdarray |
|
| |
|
|.macro callp, addr |
|
|| if ((uintptr_t)addr < 0xffffffff) { |
|
| call &addr |
|
|| } else { |
|
| mov64 rax, (uintptr_t)addr |
|
| call rax |
|
|| } |
|
|.endmacro |
|
| |
|
|// Checks PTR for end-of-buffer. |
|
|.macro check_eob, m |
|
| cmp PTR, DECODER->effective_end |
|
|| if (m->is_group) { |
|
| jae ->exit_jit |
|
|| } else { |
|
| jae =>m->jit_endofbuf_pclabel |
|
|| } |
|
|.endmacro |
|
| |
|
|// Decodes varint from [PTR + offset] -> ARG3. |
|
|// Saves new pointer as rax. |
|
|.macro decode_loaded_varint, offset |
|
| // Check for <=2 bytes inline, otherwise jump to 2-10 byte decoder. |
|
| lea rax, [PTR + offset + 1] |
|
| mov ARG3_32, ecx |
|
| and ARG3_32, 0x7f |
|
| test cl, cl |
|
| jns >9 |
|
| lea rax, [PTR + offset + 2] |
|
| movzx esi, ch |
|
| and esi, 0x7f |
|
| shl esi, 7 |
|
| or ARG3_32, esi |
|
| test cx, cx |
|
| jns >9 |
|
| mov ARG1_64, rax |
|
| mov ARG2_32, ARG3_32 |
|
| callp upb_vdecode_max8_fast |
|
| test rax, rax |
|
| jz ->exit_jit // >10-byte varint. |
|
|9: |
|
|.endmacro |
|
| |
|
|.macro decode_varint, offset |
|
| mov ecx, dword [PTR + offset] |
|
| decode_loaded_varint offset |
|
| mov PTR, rax |
|
|.endmacro |
|
| |
|
|// Decode the tag -> edx. |
|
|// Could specialize this by avoiding the value masking: could just key the |
|
|// table on the raw (length-masked) varint to save 3-4 cycles of latency. |
|
|// Currently only support tables where all entries are in the array part. |
|
|.macro dyndispatch_, m |
|
|=>m->jit_dyndispatch_pclabel: |
|
| decode_loaded_varint, 0 |
|
| mov ecx, edx |
|
| shr ecx, 3 |
|
| and edx, 0x7 |
|
| cmp ecx, m->max_field_number // Bounds-check the field. |
|
| ja ->exit_jit // In the future; could be unknown label |
|
|| if ((uintptr_t)m->tablearray < 0xffffffff) { |
|
| mov rax, qword [rcx*8 + m->tablearray] // TODO: support hybrid array/hash tables. |
|
|| } else { |
|
| mov64 rax, (uintptr_t)m->tablearray |
|
| mov rax, qword [rax + rcx*8] |
|
|| } |
|
| jmp rax // Dispatch: unpredictable jump. |
|
|.endmacro |
|
| |
|
|.if 1 |
|
| // Replicated dispatch: larger code, but better branch prediction. |
|
| .define dyndispatch, dyndispatch_ |
|
|.else |
|
| .macro dyndispatch, m |
|
| jmp =>m->jit_dyndispatch_pclabel |
|
| .endmacro |
|
|.endif |
|
| |
|
|// Push a stack frame (not the CPU stack, the upb_decoder stack). |
|
|.macro pushframe, f, end_offset_, is_sequence_ |
|
| lea rax, [FRAME + sizeof(upb_dispatcher_frame)] // rax for shorter addressing. |
|
| cmp rax, qword DECODER->dispatcher.limit |
|
| jae ->exit_jit // Frame stack overflow. |
|
| mov qword FRAME:rax->f, f |
|
| mov dword FRAME:rax->end_ofs, end_offset_ |
|
| mov byte FRAME:rax->is_sequence, is_sequence_ |
|
| mov DECODER->dispatcher.top, rax |
|
| mov FRAME, rax |
|
|.endmacro |
|
| |
|
|.macro popframe, m |
|
| sub FRAME, sizeof(upb_dispatcher_frame) |
|
| mov DECODER->dispatcher.top, FRAME |
|
| setmsgend m |
|
| mov CLOSURE, FRAME->closure |
|
|.endmacro |
|
| |
|
|.macro setmsgend, m |
|
| mov rsi, DECODER->jit_end |
|
|| if (m->is_group) { |
|
| mov64 rax, 0xffffffffffffffff |
|
| mov qword DECODER->delim_end, rax |
|
| mov DECODER->effective_end, rsi |
|
|| } else { |
|
| // Could store a correctly-biased version in the frame, at the cost of |
|
| // a larger stack. |
|
| mov eax, dword FRAME->end_ofs |
|
| add rax, qword DECODER->buf |
|
| mov DECODER->delim_end, rax // delim_end = d->buf + f->end_ofs |
|
| cmp rax, rsi |
|
| jb >8 |
|
| mov rax, rsi // effective_end = min(d->delim_end, d->jit_end) |
|
|8: |
|
| mov DECODER->effective_end, rax |
|
|| } |
|
|.endmacro |
|
| |
|
|// rax contains the tag, compare it against "tag", but since it is a varint |
|
|// we must only compare as many bytes as actually have data. |
|
|.macro checktag, tag |
|
|| switch (upb_value_size(tag)) { |
|
|| case 1: |
|
| cmp cl, tag |
|
|| break; |
|
|| case 2: |
|
| cmp cx, tag |
|
|| break; |
|
|| case 3: |
|
| and ecx, 0xffffff // 3 bytes |
|
| cmp rcx, tag |
|
|| case 4: |
|
| cmp ecx, tag |
|
|| break; |
|
|| case 5: |
|
| mov64 rdx, 0xffffffffff // 5 bytes |
|
| and rcx, rdx |
|
| cmp rcx, tag |
|
|| break; |
|
|| default: abort(); |
|
|| } |
|
|.endmacro |
|
| |
|
|// TODO: optimize for 0 (xor) and 32-bits. |
|
|.macro loadfval, f |
|
||#ifndef NDEBUG |
|
||// Since upb_value carries type information in debug mode |
|
||// only, we need to pass the arguments slightly differently. |
|
| mov ARG3_32, f->fval.type |
|
||#endif |
|
|| if (f->fval.val.uint64 == 0) { |
|
| xor ARG2_32, ARG2_32 |
|
|| } else if (f->fval.val.uint64 < 0xffffffff) { |
|
| mov ARG2_32, f->fval.val.uint64 |
|
|| } else { |
|
| mov64 ARG2_64, f->fval.val.uint64 |
|
|| } |
|
|.endmacro |
|
| |
|
|.macro sethas, reg, hasbit |
|
|| if (hasbit >= 0) { |
|
| or byte [reg + (hasbit / 8)], (1 << (hasbit % 8)) |
|
|| } |
|
|.endmacro |
|
|
|
|
|
#include <stdlib.h> |
|
#include "upb/pb/varint.h" |
|
#include "upb/msg.h" |
|
|
|
// Decodes the next val into ARG3, advances PTR. |
|
static void upb_decoder_jit_decodefield(upb_decoder *d, upb_mhandlers *m, |
|
uint8_t type, size_t tag_size) { |
|
// Decode the value into arg 3 for the callback. |
|
switch (type) { |
|
case UPB_TYPE(DOUBLE): |
|
case UPB_TYPE(FIXED64): |
|
case UPB_TYPE(SFIXED64): |
|
| mov ARG3_64, qword [PTR + tag_size] |
|
| add PTR, 8 + tag_size |
|
break; |
|
|
|
case UPB_TYPE(FLOAT): |
|
case UPB_TYPE(FIXED32): |
|
case UPB_TYPE(SFIXED32): |
|
| mov ARG3_32, dword [PTR + tag_size] |
|
| add PTR, 4 + tag_size |
|
break; |
|
|
|
case UPB_TYPE(BOOL): |
|
// Can't assume it's one byte long, because bool must be wire-compatible |
|
// with all of the varint integer types. |
|
| decode_varint tag_size |
|
| test ARG3_64, ARG3_64 |
|
| setne ARG3_8 // Other bytes left with val, should be ok. |
|
break; |
|
|
|
case UPB_TYPE(INT64): |
|
case UPB_TYPE(UINT64): |
|
case UPB_TYPE(INT32): |
|
case UPB_TYPE(UINT32): |
|
case UPB_TYPE(ENUM): |
|
| decode_varint tag_size |
|
break; |
|
|
|
case UPB_TYPE(SINT64): |
|
// 64-bit zig-zag decoding. |
|
| decode_varint tag_size |
|
| mov rax, ARG3_64 |
|
| shr ARG3_64, 1 |
|
| and rax, 1 |
|
| neg rax |
|
| xor ARG3_64, rax |
|
break; |
|
|
|
case UPB_TYPE(SINT32): |
|
// 32-bit zig-zag decoding. |
|
| decode_varint tag_size |
|
| mov eax, ARG3_32 |
|
| shr ARG3_32, 1 |
|
| and eax, 1 |
|
| neg eax |
|
| xor ARG3_32, eax |
|
break; |
|
|
|
case UPB_TYPE(STRING): |
|
case UPB_TYPE(BYTES): |
|
// We only handle the case where the entire string is in our current |
|
// buf, which sidesteps any security problems. The C path has more |
|
// robust checks. |
|
| mov ecx, dword [PTR + tag_size] |
|
| decode_loaded_varint tag_size |
|
| mov rdi, rax |
|
| add rdi, ARG3_64 |
|
| mov STRREF->len, ARG3_32 |
|
| mov STRREF->ptr, rax |
|
| sub rax, DECODER->buf |
|
| add eax, DECODER->bufstart_ofs // = d->ptr - d->buf + d->bufstart_ofs |
|
| mov STRREF->stream_offset, eax |
|
| mov ARG3_64, STRREF |
|
| cmp rdi, DECODER->effective_end |
|
| ja ->exit_jit // Can't deliver, whole string not in buf. |
|
| mov PTR, rdi |
|
break; |
|
|
|
case UPB_TYPE_ENDGROUP: // A pseudo-type. |
|
| add PTR, tag_size |
|
| jmp =>m->jit_endofmsg_pclabel |
|
return; |
|
|
|
// Will dispatch callbacks and call submessage in a second. |
|
case UPB_TYPE(MESSAGE): |
|
| decode_varint tag_size |
|
break; |
|
case UPB_TYPE(GROUP): |
|
| add PTR, tag_size |
|
break; |
|
|
|
default: abort(); |
|
} |
|
} |
|
|
|
#if 0 |
|
// These appear not to speed things up, but keeping around for |
|
// further experimentation. |
|
static void upb_decoder_jit_doappend(upb_decoder *d, uint8_t size, |
|
upb_fhandlers *f) { |
|
| mov eax, STDARRAY:ARG1_64->len |
|
| cmp eax, STDARRAY:ARG1_64->size |
|
| jne >2 |
|
// If array is full, fall back to actual function. |
|
| loadfval f |
|
| callp f->value |
|
| jmp >3 |
|
|2: |
|
| mov rcx, STDARRAY:ARG1_64->ptr |
|
| mov esi, eax |
|
| add eax, 1 |
|
|
|
switch (size) { |
|
case 8: |
|
| mov [rcx + rsi * 8], ARG3_64 |
|
break; |
|
|
|
case 4: |
|
| mov [rcx + rsi * 4], ARG3_32 |
|
break; |
|
|
|
case 1: |
|
| mov [rcx + rsi * 4], ARG3_8 |
|
break; |
|
} |
|
|
|
| mov STDARRAY:ARG1_64->len, eax |
|
|3: |
|
} |
|
#endif |
|
|
|
static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) { |
|
// Call callbacks. |
|
if (upb_issubmsgtype(f->type)) { |
|
if (f->type == UPB_TYPE(MESSAGE)) { |
|
| mov rsi, PTR |
|
| sub rsi, DECODER->buf |
|
| add esi, ARG3_32 // = (d->ptr - d->buf) + delim_len |
|
} else { |
|
assert(f->type == UPB_TYPE(GROUP)); |
|
| mov esi, UPB_NONDELIMITED |
|
} |
|
| pushframe f, esi, false |
|
|
|
// Call startsubmsg handler (if any). |
|
if (f->startsubmsg) { |
|
// upb_sflow_t startsubmsg(void *closure, upb_value fval) |
|
| mov ARG1_64, CLOSURE |
|
| loadfval f |
|
| callp f->startsubmsg |
|
| mov CLOSURE, rdx |
|
} |
|
| mov qword FRAME->closure, CLOSURE |
|
|
|
const upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f); |
|
if (sub_m->jit_parent_field_done_pclabel != UPB_MULTIPLE) { |
|
| jmp =>sub_m->jit_startmsg_pclabel; |
|
} else { |
|
| call =>sub_m->jit_startmsg_pclabel; |
|
} |
|
|
|
|=>f->jit_submsg_done_pclabel: |
|
|
|
// Call endsubmsg handler (if any). |
|
if (f->endsubmsg) { |
|
// upb_flow_t endsubmsg(void *closure, upb_value fval); |
|
| mov ARG1_64, CLOSURE |
|
| loadfval f |
|
| callp f->endsubmsg |
|
} |
|
| popframe upb_fhandlers_getmsg(f) |
|
|
|
} else { |
|
| mov ARG1_64, CLOSURE |
|
// Test for callbacks we can specialize. |
|
// Can't switch() on function pointers. |
|
if (f->value == &upb_stdmsg_setint64 || |
|
f->value == &upb_stdmsg_setuint64 || |
|
f->value == &upb_stdmsg_setptr || |
|
f->value == &upb_stdmsg_setdouble) { |
|
const upb_fielddef *fd = upb_value_getfielddef(f->fval); |
|
| mov [ARG1_64 + fd->offset], ARG3_64 |
|
} else if (f->value == &upb_stdmsg_setint32 || |
|
f->value == &upb_stdmsg_setuint32 || |
|
f->value == &upb_stdmsg_setfloat) { |
|
const upb_fielddef *fd = upb_value_getfielddef(f->fval); |
|
| mov [ARG1_64 + fd->offset], ARG3_32 |
|
} else if (f->value == &upb_stdmsg_setbool) { |
|
const upb_fielddef *fd = upb_value_getfielddef(f->fval); |
|
| mov [ARG1_64 + fd->offset], ARG3_8 |
|
#if 0 |
|
// These appear not to speed things up, but keeping around for |
|
// further experimentation. |
|
} else if (f->value == &upb_stdmsg_setint64_r || |
|
f->value == &upb_stdmsg_setuint64_r || |
|
f->value == &upb_stdmsg_setptr_r || |
|
f->value == &upb_stdmsg_setdouble_r) { |
|
upb_decoder_jit_doappend(d, 8, f); |
|
} else if (f->value == &upb_stdmsg_setint32_r || |
|
f->value == &upb_stdmsg_setuint32_r || |
|
f->value == &upb_stdmsg_setfloat_r) { |
|
upb_decoder_jit_doappend(d, 4, f); |
|
} else if (f->value == &upb_stdmsg_setbool_r) { |
|
upb_decoder_jit_doappend(d, 1, f); |
|
#endif |
|
} else { |
|
// Load closure and fval into arg registers. |
|
||#ifndef NDEBUG |
|
||// Since upb_value carries type information in debug mode |
|
||// only, we need to pass the arguments slightly differently. |
|
| mov ARG4_64, ARG3_64 |
|
| mov ARG5_32, upb_types[f->type].inmemory_type |
|
||#endif |
|
| loadfval f |
|
| callp f->value |
|
} |
|
| sethas CLOSURE, f->valuehasbit |
|
} |
|
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK |
|
} |
|
|
|
// PTR should point to the beginning of the tag. |
|
static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag, |
|
upb_mhandlers *m, |
|
upb_fhandlers *f, upb_fhandlers *next_f) { |
|
// PC-label for the dispatch table. |
|
// We check the wire type (which must be loaded in edx) because the |
|
// table is keyed on field number, not type. |
|
|=>f->jit_pclabel: |
|
| cmp edx, (tag & 0x7) |
|
| jne ->exit_jit // In the future: could be an unknown field or packed. |
|
|=>f->jit_pclabel_notypecheck: |
|
if (f->repeated) { |
|
| mov esi, FRAME->end_ofs |
|
| pushframe f, esi, true |
|
if (f->startseq) { |
|
| mov ARG1_64, CLOSURE |
|
| loadfval f |
|
| callp f->startseq |
|
| mov CLOSURE, rdx |
|
} |
|
| mov qword FRAME->closure, CLOSURE |
|
} |
|
|
|
|1: // Label for repeating this field. |
|
|
|
upb_decoder_jit_decodefield(d, m, f->type, upb_value_size(tag)); |
|
upb_decoder_jit_callcb(d, f); |
|
|
|
// Epilogue: load next tag, check for repeated field. |
|
| check_eob m |
|
| mov rcx, qword [PTR] |
|
if (f->repeated) { |
|
| checktag tag |
|
| je <1 |
|
if (f->endseq) { |
|
| mov ARG1_64, CLOSURE |
|
| loadfval f |
|
| callp f->endseq |
|
} |
|
| popframe m |
|
} |
|
if (next_tag != 0) { |
|
| checktag next_tag |
|
| je =>next_f->jit_pclabel_notypecheck |
|
} |
|
|
|
// Fall back to dynamic dispatch. |
|
| dyndispatch m |
|
|1: |
|
} |
|
|
|
static int upb_compare_uint32(const void *a, const void *b) { |
|
// TODO: always put ENDGROUP at the end. |
|
return *(uint32_t*)a - *(uint32_t*)b; |
|
} |
|
|
|
static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) { |
|
|=>m->jit_startmsg_pclabel: |
|
|
|
if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) { |
|
// There was a call to get here, so we need to align the stack. |
|
| sub rsp, 8 |
|
} |
|
// Call startmsg handler (if any): |
|
if (m->startmsg) { |
|
// upb_flow_t startmsg(void *closure); |
|
| mov ARG1_64, FRAME->closure |
|
| callp m->startmsg |
|
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK |
|
} |
|
|
|
| setmsgend m |
|
| check_eob m |
|
| mov ecx, dword [PTR] |
|
| dyndispatch_ m |
|
|
|
// --------- New code section (does not fall through) ------------------------ |
|
|
|
// Emit code for parsing each field (dynamic dispatch contains pointers to |
|
// all of these). |
|
|
|
// Create an ordering over the fields (inttable ordering is undefined). |
|
int num_keys = upb_inttable_count(&m->fieldtab); |
|
uint32_t *keys = malloc(num_keys * sizeof(*keys)); |
|
int idx = 0; |
|
for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i); |
|
i = upb_inttable_next(&m->fieldtab, i)) { |
|
keys[idx++] = upb_inttable_iter_key(i); |
|
} |
|
qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32); |
|
|
|
upb_fhandlers *last_f = NULL; |
|
uint32_t last_tag = 0; |
|
for(int i = 0; i < num_keys; i++) { |
|
uint32_t key = keys[i]; |
|
upb_fhandlers *f = upb_inttable_lookup(&m->fieldtab, key); |
|
uint32_t tag = upb_vencode32(key); |
|
if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f); |
|
last_tag = tag; |
|
last_f = f; |
|
} |
|
upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL); |
|
|
|
free(keys); |
|
|
|
// --------- New code section (does not fall through) ------------------------ |
|
|
|
// End-of-buf / end-of-message. |
|
if (!m->is_group) { |
|
// This case doesn't exist for groups, because there eob really means |
|
// eob, so that case just exits the jit directly. |
|
|=>m->jit_endofbuf_pclabel: |
|
| cmp PTR, DECODER->delim_end |
|
| jb ->exit_jit // We are at eob, but not end-of-submsg. |
|
} |
|
|
|
|=>m->jit_endofmsg_pclabel: |
|
// We are at end-of-submsg: call endmsg handler (if any): |
|
if (m->endmsg) { |
|
// void endmsg(void *closure, upb_status *status) { |
|
| mov ARG1_64, FRAME->closure |
|
| lea ARG2_64, DECODER->dispatcher.status |
|
| callp m->endmsg |
|
} |
|
|
|
if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) { |
|
// Counter previous alignment. |
|
| add rsp, 8 |
|
| ret |
|
} else if (m->jit_parent_field_done_pclabel == UPB_TOPLEVEL_ONE) { |
|
| jmp ->exit_jit |
|
} else { |
|
| jmp =>m->jit_parent_field_done_pclabel |
|
} |
|
|
|
} |
|
|
|
static const char *dbgfmt = |
|
"JIT encountered unknown field! wt=%d, fn=%d\n"; |
|
|
|
static void upb_decoder_jit(upb_decoder *d) { |
|
| push rbp |
|
| mov rbp, rsp |
|
| push r15 |
|
| push r14 |
|
| push r13 |
|
| push r12 |
|
| push rbx |
|
// Align stack. |
|
| sub rsp, 8 |
|
| mov DECODER, ARG1_64 |
|
| mov FRAME, DECODER:ARG1_64->dispatcher.top |
|
| lea STRREF, DECODER:ARG1_64->strref |
|
| mov CLOSURE, FRAME->closure |
|
| mov PTR, DECODER->ptr |
|
|
|
upb_handlers *h = d->dispatcher.handlers; |
|
if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_MULTIPLE) { |
|
| call =>h->msgs[0]->jit_startmsg_pclabel |
|
| jmp ->exit_jit |
|
} |
|
|
|
// TODO: push return addresses for re-entry (will be necessary for multiple |
|
// buffer support). |
|
for (int i = 0; i < h->msgs_len; i++) upb_decoder_jit_msg(d, h->msgs[i]); |
|
|
|
|->exit_jit: |
|
| mov DECODER->ptr, PTR |
|
// Counter previous alignment. |
|
| add rsp, 8 |
|
| pop rbx |
|
| pop r12 |
|
| pop r13 |
|
| pop r14 |
|
| pop r15 |
|
| leave |
|
| ret |
|
|=>0: |
|
| mov rdi, stderr |
|
| mov rsi, dbgfmt |
|
| callp fprintf |
|
| callp abort |
|
} |
|
|
|
void upb_decoder_jit_assignfieldlabs(upb_fhandlers *f, |
|
uint32_t *pclabel_count) { |
|
f->jit_pclabel = (*pclabel_count)++; |
|
f->jit_pclabel_notypecheck = (*pclabel_count)++; |
|
f->jit_submsg_done_pclabel = (*pclabel_count)++; |
|
} |
|
|
|
void upb_decoder_jit_assignmsglabs(upb_mhandlers *m, uint32_t *pclabel_count) { |
|
m->jit_startmsg_pclabel = (*pclabel_count)++; |
|
m->jit_endofbuf_pclabel = (*pclabel_count)++; |
|
m->jit_endofmsg_pclabel = (*pclabel_count)++; |
|
m->jit_dyndispatch_pclabel = (*pclabel_count)++; |
|
m->jit_unknownfield_pclabel = (*pclabel_count)++; |
|
m->jit_parent_field_done_pclabel = UPB_NONE; |
|
m->max_field_number = 0; |
|
upb_inttable_iter i; |
|
for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i); |
|
i = upb_inttable_next(&m->fieldtab, i)) { |
|
uint32_t key = upb_inttable_iter_key(i); |
|
m->max_field_number = UPB_MAX(m->max_field_number, key); |
|
upb_fhandlers *f = upb_inttable_iter_value(i); |
|
upb_decoder_jit_assignfieldlabs(f, pclabel_count); |
|
} |
|
// XXX: Won't work for large field numbers; will need to use a upb_table. |
|
m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*)); |
|
} |
|
|
|
// Second pass: for messages that have only one parent, link them to the field |
|
// from which they are called. |
|
void upb_decoder_jit_assignmsglabs2(upb_mhandlers *m) { |
|
upb_inttable_iter i; |
|
for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i); |
|
i = upb_inttable_next(&m->fieldtab, i)) { |
|
upb_fhandlers *f = upb_inttable_iter_value(i); |
|
if (upb_issubmsgtype(f->type)) { |
|
upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f); |
|
if (sub_m->jit_parent_field_done_pclabel == UPB_NONE) { |
|
sub_m->jit_parent_field_done_pclabel = f->jit_submsg_done_pclabel; |
|
} else { |
|
sub_m->jit_parent_field_done_pclabel = UPB_MULTIPLE; |
|
} |
|
} |
|
} |
|
} |
|
|
|
void upb_decoder_makejit(upb_decoder *d) { |
|
d->debug_info = NULL; |
|
|
|
// Assign pclabels. |
|
uint32_t pclabel_count = 1; |
|
upb_handlers *h = d->dispatcher.handlers; |
|
for (int i = 0; i < h->msgs_len; i++) |
|
upb_decoder_jit_assignmsglabs(h->msgs[i], &pclabel_count); |
|
for (int i = 0; i < h->msgs_len; i++) |
|
upb_decoder_jit_assignmsglabs2(h->msgs[i]); |
|
|
|
if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_NONE) { |
|
h->msgs[0]->jit_parent_field_done_pclabel = UPB_TOPLEVEL_ONE; |
|
} |
|
|
|
void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals)); |
|
dasm_init(d, 1); |
|
dasm_setupglobal(d, globals, UPB_JIT_GLOBAL__MAX); |
|
dasm_growpc(d, pclabel_count); |
|
dasm_setup(d, upb_jit_actionlist); |
|
|
|
upb_decoder_jit(d); |
|
|
|
dasm_link(d, &d->jit_size); |
|
|
|
d->jit_code = mmap(NULL, d->jit_size, PROT_READ | PROT_WRITE, |
|
MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); |
|
|
|
upb_reg_jit_gdb(d); |
|
|
|
dasm_encode(d, d->jit_code); |
|
|
|
// Create dispatch tables. |
|
for (int i = 0; i < h->msgs_len; i++) { |
|
upb_mhandlers *m = h->msgs[i]; |
|
for (uint32_t j = 0; j <= m->max_field_number; j++) { |
|
upb_fhandlers *f = NULL; |
|
for (int k = 0; k < 8; k++) { |
|
f = upb_inttable_lookup(&m->fieldtab, (j << 3) | k); |
|
if (f) break; |
|
} |
|
if (f) { |
|
m->tablearray[j] = d->jit_code + dasm_getpclabel(d, f->jit_pclabel); |
|
} else { |
|
// Don't handle unknown fields yet. |
|
m->tablearray[j] = d->jit_code + dasm_getpclabel(d, 0); |
|
} |
|
} |
|
} |
|
|
|
dasm_free(d); |
|
free(globals); |
|
|
|
mprotect(d->jit_code, d->jit_size, PROT_EXEC | PROT_READ); |
|
|
|
// View with: objdump -M intel -D -b binary -mi386 -Mx86-64 /tmp/machine-code |
|
// Or: ndisasm -b 64 /tmp/machine-code |
|
FILE *f = fopen("/tmp/machine-code", "wb"); |
|
fwrite(d->jit_code, d->jit_size, 1, f); |
|
fclose(f); |
|
} |
|
|
|
void upb_decoder_freejit(upb_decoder *d) { |
|
munmap(d->jit_code, d->jit_size); |
|
free(d->debug_info); |
|
// TODO: unregister |
|
}
|
|
|