Protocol Buffers - Google's data interchange format (grpc依赖) https://developers.google.com/protocol-buffers/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

819 lines
24 KiB

|//
|// upb - a minimalist implementation of protocol buffers.
|//
|// Copyright (c) 2011 Google Inc. See LICENSE for details.
|// Author: Josh Haberman <jhaberman@gmail.com>
|//
|// JIT compiler for upb_decoder on x86. Given a upb_handlers object,
|// generates code specialized to parsing the specific message and
|// calling specific handlers.
|//
|// Since the JIT can call other functions (the JIT'ted code is not a leaf
|// function) we must respect alignment rules. On OS X, this means aligning
|// the stack to 16 bytes.
#define UPB_NONE -1
#define UPB_MULTIPLE -2
#define UPB_TOPLEVEL_ONE -3
#include <sys/mman.h>
#include "dynasm/dasm_proto.h"
#include "dynasm/dasm_x86.h"
#ifndef MAP_ANONYMOUS
# define MAP_ANONYMOUS MAP_ANON
#endif
// We map into the low 32 bits when we can, but if this is not available
// (like on OS X) we take what we can get. It's not required for correctness,
// it's just a performance thing that makes it more likely that our jumps
// can be rel32 (i.e. within 32-bits of our pc) instead of the longer
// sequence required for other jumps (see callp).
#ifndef MAP_32BIT
#define MAP_32BIT 0
#endif
// To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code
// at runtime. GDB 7.x+ has defined an interface for doing this, and these
// structure/function defintions are copied out of gdb/jit.h
//
// We need to give GDB an ELF file at runtime describing the symbols we have
// generated. To avoid implementing the ELF format, we generate an ELF file
// at compile-time and compile it in as a character string. We can replace
// a few key constants (address of JIT-ted function and its size) by looking
// for a few magic numbers and doing a dumb string replacement.
#ifndef __APPLE__
const unsigned char upb_jit_debug_elf_file[] = {
#include "upb/pb/jit_debug_elf_file.h"
};
typedef enum
{
GDB_JIT_NOACTION = 0,
GDB_JIT_REGISTER,
GDB_JIT_UNREGISTER
} jit_actions_t;
typedef struct gdb_jit_entry {
struct gdb_jit_entry *next_entry;
struct gdb_jit_entry *prev_entry;
const char *symfile_addr;
uint64_t symfile_size;
} gdb_jit_entry;
typedef struct {
uint32_t version;
uint32_t action_flag;
gdb_jit_entry *relevant_entry;
gdb_jit_entry *first_entry;
} gdb_jit_descriptor;
gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL};
void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); }
void upb_reg_jit_gdb(upb_decoder *d) {
// Create debug info.
size_t elf_len = sizeof(upb_jit_debug_elf_file);
d->debug_info = malloc(elf_len);
memcpy(d->debug_info, upb_jit_debug_elf_file, elf_len);
uint64_t *p = (void*)d->debug_info;
for (; (void*)(p+1) <= (void*)d->debug_info + elf_len; ++p) {
if (*p == 0x12345678) { *p = (uintptr_t)d->jit_code; }
if (*p == 0x321) { *p = d->jit_size; }
}
// Register the JIT-ted code with GDB.
gdb_jit_entry *e = malloc(sizeof(gdb_jit_entry));
e->next_entry = __jit_debug_descriptor.first_entry;
e->prev_entry = NULL;
if (e->next_entry) e->next_entry->prev_entry = e;
e->symfile_addr = d->debug_info;
e->symfile_size = elf_len;
__jit_debug_descriptor.first_entry = e;
__jit_debug_descriptor.relevant_entry = e;
__jit_debug_descriptor.action_flag = GDB_JIT_REGISTER;
__jit_debug_register_code();
}
#else
void upb_reg_jit_gdb(upb_decoder *d) {
(void)d;
}
#endif
|.arch x64
|.actionlist upb_jit_actionlist
|.globals UPB_JIT_GLOBAL_
|.globalnames upb_jit_globalnames
|
|// Calling conventions. Note -- this will need to be changed for
|// Windows, which uses a different calling convention!
|.define ARG1_64, rdi
|.define ARG2_8, sil
|.define ARG2_32, esi
|.define ARG2_64, rsi
|.define ARG3_8, dl
|.define ARG3_32, edx
|.define ARG3_64, rdx
|.define ARG4_64, rcx
|.define ARG5_32, r8d
|
|// Register allocation / type map.
|// ALL of the code in this file uses these register allocations.
|// When we "call" within this file, we do not use regular calling
|// conventions, but of course when calling to user callbacks we must.
|.define PTR, rbx
|.define CLOSURE, r12
|.type FRAME, upb_dispatcher_frame, r13
|.type STRREF, upb_strref, r14
|.type DECODER, upb_decoder, r15
|.type STDARRAY, upb_stdarray
|
|.macro callp, addr
|| if ((uintptr_t)addr < 0xffffffff) {
| call &addr
|| } else {
| mov64 rax, (uintptr_t)addr
| call rax
|| }
|.endmacro
|
|// Checks PTR for end-of-buffer.
|.macro check_eob, m
| cmp PTR, DECODER->effective_end
|| if (m->is_group) {
| jae ->exit_jit
|| } else {
| jae =>m->jit_endofbuf_pclabel
|| }
|.endmacro
|
|// Decodes varint from [PTR + offset] -> ARG3.
|// Saves new pointer as rax.
|.macro decode_loaded_varint, offset
| // Check for <=2 bytes inline, otherwise jump to 2-10 byte decoder.
| lea rax, [PTR + offset + 1]
| mov ARG3_32, ecx
| and ARG3_32, 0x7f
| test cl, cl
| jns >9
| lea rax, [PTR + offset + 2]
| movzx esi, ch
| and esi, 0x7f
| shl esi, 7
| or ARG3_32, esi
| test cx, cx
| jns >9
| mov ARG1_64, rax
| mov ARG2_32, ARG3_32
| callp upb_vdecode_max8_fast
| test rax, rax
| jz ->exit_jit // >10-byte varint.
|9:
|.endmacro
|
|.macro decode_varint, offset
| mov ecx, dword [PTR + offset]
| decode_loaded_varint offset
| mov PTR, rax
|.endmacro
|
|// Decode the tag -> edx.
|// Could specialize this by avoiding the value masking: could just key the
|// table on the raw (length-masked) varint to save 3-4 cycles of latency.
|// Currently only support tables where all entries are in the array part.
|.macro dyndispatch_, m
|=>m->jit_dyndispatch_pclabel:
| decode_loaded_varint, 0
| mov ecx, edx
| shr ecx, 3
| and edx, 0x7
| cmp ecx, m->max_field_number // Bounds-check the field.
| ja ->exit_jit // In the future; could be unknown label
|| if ((uintptr_t)m->tablearray < 0xffffffff) {
| mov rax, qword [rcx*8 + m->tablearray] // TODO: support hybrid array/hash tables.
|| } else {
| mov64 rax, (uintptr_t)m->tablearray
| mov rax, qword [rax + rcx*8]
|| }
| jmp rax // Dispatch: unpredictable jump.
|.endmacro
|
|.if 1
| // Replicated dispatch: larger code, but better branch prediction.
| .define dyndispatch, dyndispatch_
|.else
| .macro dyndispatch, m
| jmp =>m->jit_dyndispatch_pclabel
| .endmacro
|.endif
|
|// Push a stack frame (not the CPU stack, the upb_decoder stack).
|.macro pushframe, f, end_offset_, is_sequence_
| lea rax, [FRAME + sizeof(upb_dispatcher_frame)] // rax for shorter addressing.
| cmp rax, qword DECODER->dispatcher.limit
| jae ->exit_jit // Frame stack overflow.
| mov qword FRAME:rax->f, f
| mov dword FRAME:rax->end_ofs, end_offset_
| mov byte FRAME:rax->is_sequence, is_sequence_
| mov DECODER->dispatcher.top, rax
| mov FRAME, rax
|.endmacro
|
|.macro popframe, m
| sub FRAME, sizeof(upb_dispatcher_frame)
| mov DECODER->dispatcher.top, FRAME
| setmsgend m
| mov CLOSURE, FRAME->closure
|.endmacro
|
|.macro setmsgend, m
| mov rsi, DECODER->jit_end
|| if (m->is_group) {
| mov64 rax, 0xffffffffffffffff
| mov qword DECODER->delim_end, rax
| mov DECODER->effective_end, rsi
|| } else {
| // Could store a correctly-biased version in the frame, at the cost of
| // a larger stack.
| mov eax, dword FRAME->end_ofs
| add rax, qword DECODER->buf
| mov DECODER->delim_end, rax // delim_end = d->buf + f->end_ofs
| cmp rax, rsi
| jb >8
| mov rax, rsi // effective_end = min(d->delim_end, d->jit_end)
|8:
| mov DECODER->effective_end, rax
|| }
|.endmacro
|
|// rax contains the tag, compare it against "tag", but since it is a varint
|// we must only compare as many bytes as actually have data.
|.macro checktag, tag
|| switch (upb_value_size(tag)) {
|| case 1:
| cmp cl, tag
|| break;
|| case 2:
| cmp cx, tag
|| break;
|| case 3:
| and ecx, 0xffffff // 3 bytes
| cmp rcx, tag
|| case 4:
| cmp ecx, tag
|| break;
|| case 5:
| mov64 rdx, 0xffffffffff // 5 bytes
| and rcx, rdx
| cmp rcx, tag
|| break;
|| default: abort();
|| }
|.endmacro
|
|// TODO: optimize for 0 (xor) and 32-bits.
|.macro loadfval, f
||#ifndef NDEBUG
||// Since upb_value carries type information in debug mode
||// only, we need to pass the arguments slightly differently.
| mov ARG3_32, f->fval.type
||#endif
|| if (f->fval.val.uint64 == 0) {
| xor ARG2_32, ARG2_32
|| } else if (f->fval.val.uint64 < 0xffffffff) {
| mov ARG2_32, f->fval.val.uint64
|| } else {
| mov64 ARG2_64, f->fval.val.uint64
|| }
|.endmacro
|
|.macro sethas, reg, hasbit
|| if (hasbit >= 0) {
| or byte [reg + (hasbit / 8)], (1 << (hasbit % 8))
|| }
|.endmacro
#include <stdlib.h>
#include "upb/pb/varint.h"
#include "upb/msg.h"
// Decodes the next val into ARG3, advances PTR.
static void upb_decoder_jit_decodefield(upb_decoder *d, upb_mhandlers *m,
uint8_t type, size_t tag_size) {
// Decode the value into arg 3 for the callback.
switch (type) {
case UPB_TYPE(DOUBLE):
case UPB_TYPE(FIXED64):
case UPB_TYPE(SFIXED64):
| mov ARG3_64, qword [PTR + tag_size]
| add PTR, 8 + tag_size
break;
case UPB_TYPE(FLOAT):
case UPB_TYPE(FIXED32):
case UPB_TYPE(SFIXED32):
| mov ARG3_32, dword [PTR + tag_size]
| add PTR, 4 + tag_size
break;
case UPB_TYPE(BOOL):
// Can't assume it's one byte long, because bool must be wire-compatible
// with all of the varint integer types.
| decode_varint tag_size
| test ARG3_64, ARG3_64
| setne ARG3_8 // Other bytes left with val, should be ok.
break;
case UPB_TYPE(INT64):
case UPB_TYPE(UINT64):
case UPB_TYPE(INT32):
case UPB_TYPE(UINT32):
case UPB_TYPE(ENUM):
| decode_varint tag_size
break;
case UPB_TYPE(SINT64):
// 64-bit zig-zag decoding.
| decode_varint tag_size
| mov rax, ARG3_64
| shr ARG3_64, 1
| and rax, 1
| neg rax
| xor ARG3_64, rax
break;
case UPB_TYPE(SINT32):
// 32-bit zig-zag decoding.
| decode_varint tag_size
| mov eax, ARG3_32
| shr ARG3_32, 1
| and eax, 1
| neg eax
| xor ARG3_32, eax
break;
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES):
// We only handle the case where the entire string is in our current
// buf, which sidesteps any security problems. The C path has more
// robust checks.
| mov ecx, dword [PTR + tag_size]
| decode_loaded_varint tag_size
| mov rdi, rax
| add rdi, ARG3_64
| mov STRREF->len, ARG3_32
| mov STRREF->ptr, rax
| sub rax, DECODER->buf
| add eax, DECODER->bufstart_ofs // = d->ptr - d->buf + d->bufstart_ofs
| mov STRREF->stream_offset, eax
| mov ARG3_64, STRREF
| cmp rdi, DECODER->effective_end
| ja ->exit_jit // Can't deliver, whole string not in buf.
| mov PTR, rdi
break;
case UPB_TYPE_ENDGROUP: // A pseudo-type.
| add PTR, tag_size
| jmp =>m->jit_endofmsg_pclabel
return;
// Will dispatch callbacks and call submessage in a second.
case UPB_TYPE(MESSAGE):
| decode_varint tag_size
break;
case UPB_TYPE(GROUP):
| add PTR, tag_size
break;
default: abort();
}
}
#if 0
// These appear not to speed things up, but keeping around for
// further experimentation.
static void upb_decoder_jit_doappend(upb_decoder *d, uint8_t size,
upb_fhandlers *f) {
| mov eax, STDARRAY:ARG1_64->len
| cmp eax, STDARRAY:ARG1_64->size
| jne >2
// If array is full, fall back to actual function.
| loadfval f
| callp f->value
| jmp >3
|2:
| mov rcx, STDARRAY:ARG1_64->ptr
| mov esi, eax
| add eax, 1
switch (size) {
case 8:
| mov [rcx + rsi * 8], ARG3_64
break;
case 4:
| mov [rcx + rsi * 4], ARG3_32
break;
case 1:
| mov [rcx + rsi * 4], ARG3_8
break;
}
| mov STDARRAY:ARG1_64->len, eax
|3:
}
#endif
static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
// Call callbacks.
if (upb_issubmsgtype(f->type)) {
if (f->type == UPB_TYPE(MESSAGE)) {
| mov rsi, PTR
| sub rsi, DECODER->buf
| add esi, ARG3_32 // = (d->ptr - d->buf) + delim_len
} else {
assert(f->type == UPB_TYPE(GROUP));
| mov esi, UPB_NONDELIMITED
}
| pushframe f, esi, false
// Call startsubmsg handler (if any).
if (f->startsubmsg) {
// upb_sflow_t startsubmsg(void *closure, upb_value fval)
| mov ARG1_64, CLOSURE
| loadfval f
| callp f->startsubmsg
| mov CLOSURE, rdx
}
| mov qword FRAME->closure, CLOSURE
const upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
if (sub_m->jit_parent_field_done_pclabel != UPB_MULTIPLE) {
| jmp =>sub_m->jit_startmsg_pclabel;
} else {
| call =>sub_m->jit_startmsg_pclabel;
}
|=>f->jit_submsg_done_pclabel:
// Call endsubmsg handler (if any).
if (f->endsubmsg) {
// upb_flow_t endsubmsg(void *closure, upb_value fval);
| mov ARG1_64, CLOSURE
| loadfval f
| callp f->endsubmsg
}
| popframe upb_fhandlers_getmsg(f)
} else {
| mov ARG1_64, CLOSURE
// Test for callbacks we can specialize.
// Can't switch() on function pointers.
if (f->value == &upb_stdmsg_setint64 ||
f->value == &upb_stdmsg_setuint64 ||
f->value == &upb_stdmsg_setptr ||
f->value == &upb_stdmsg_setdouble) {
const upb_fielddef *fd = upb_value_getfielddef(f->fval);
| mov [ARG1_64 + fd->offset], ARG3_64
} else if (f->value == &upb_stdmsg_setint32 ||
f->value == &upb_stdmsg_setuint32 ||
f->value == &upb_stdmsg_setfloat) {
const upb_fielddef *fd = upb_value_getfielddef(f->fval);
| mov [ARG1_64 + fd->offset], ARG3_32
} else if (f->value == &upb_stdmsg_setbool) {
const upb_fielddef *fd = upb_value_getfielddef(f->fval);
| mov [ARG1_64 + fd->offset], ARG3_8
#if 0
// These appear not to speed things up, but keeping around for
// further experimentation.
} else if (f->value == &upb_stdmsg_setint64_r ||
f->value == &upb_stdmsg_setuint64_r ||
f->value == &upb_stdmsg_setptr_r ||
f->value == &upb_stdmsg_setdouble_r) {
upb_decoder_jit_doappend(d, 8, f);
} else if (f->value == &upb_stdmsg_setint32_r ||
f->value == &upb_stdmsg_setuint32_r ||
f->value == &upb_stdmsg_setfloat_r) {
upb_decoder_jit_doappend(d, 4, f);
} else if (f->value == &upb_stdmsg_setbool_r) {
upb_decoder_jit_doappend(d, 1, f);
#endif
} else {
// Load closure and fval into arg registers.
||#ifndef NDEBUG
||// Since upb_value carries type information in debug mode
||// only, we need to pass the arguments slightly differently.
| mov ARG4_64, ARG3_64
| mov ARG5_32, upb_types[f->type].inmemory_type
||#endif
| loadfval f
| callp f->value
}
| sethas CLOSURE, f->valuehasbit
}
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
}
// PTR should point to the beginning of the tag.
static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag,
upb_mhandlers *m,
upb_fhandlers *f, upb_fhandlers *next_f) {
// PC-label for the dispatch table.
// We check the wire type (which must be loaded in edx) because the
// table is keyed on field number, not type.
|=>f->jit_pclabel:
| cmp edx, (tag & 0x7)
| jne ->exit_jit // In the future: could be an unknown field or packed.
|=>f->jit_pclabel_notypecheck:
if (f->repeated) {
| mov esi, FRAME->end_ofs
| pushframe f, esi, true
if (f->startseq) {
| mov ARG1_64, CLOSURE
| loadfval f
| callp f->startseq
| mov CLOSURE, rdx
}
| mov qword FRAME->closure, CLOSURE
}
|1: // Label for repeating this field.
upb_decoder_jit_decodefield(d, m, f->type, upb_value_size(tag));
upb_decoder_jit_callcb(d, f);
// Epilogue: load next tag, check for repeated field.
| check_eob m
| mov rcx, qword [PTR]
if (f->repeated) {
| checktag tag
| je <1
if (f->endseq) {
| mov ARG1_64, CLOSURE
| loadfval f
| callp f->endseq
}
| popframe m
}
if (next_tag != 0) {
| checktag next_tag
| je =>next_f->jit_pclabel_notypecheck
}
// Fall back to dynamic dispatch.
| dyndispatch m
|1:
}
static int upb_compare_uint32(const void *a, const void *b) {
// TODO: always put ENDGROUP at the end.
return *(uint32_t*)a - *(uint32_t*)b;
}
static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
|=>m->jit_startmsg_pclabel:
if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
// There was a call to get here, so we need to align the stack.
| sub rsp, 8
}
// Call startmsg handler (if any):
if (m->startmsg) {
// upb_flow_t startmsg(void *closure);
| mov ARG1_64, FRAME->closure
| callp m->startmsg
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
}
| setmsgend m
| check_eob m
| mov ecx, dword [PTR]
| dyndispatch_ m
// --------- New code section (does not fall through) ------------------------
// Emit code for parsing each field (dynamic dispatch contains pointers to
// all of these).
// Create an ordering over the fields (inttable ordering is undefined).
int num_keys = upb_inttable_count(&m->fieldtab);
uint32_t *keys = malloc(num_keys * sizeof(*keys));
int idx = 0;
for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
keys[idx++] = upb_inttable_iter_key(i);
}
qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);
upb_fhandlers *last_f = NULL;
uint32_t last_tag = 0;
for(int i = 0; i < num_keys; i++) {
uint32_t key = keys[i];
upb_fhandlers *f = upb_inttable_lookup(&m->fieldtab, key);
uint32_t tag = upb_vencode32(key);
if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f);
last_tag = tag;
last_f = f;
}
upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL);
free(keys);
// --------- New code section (does not fall through) ------------------------
// End-of-buf / end-of-message.
if (!m->is_group) {
// This case doesn't exist for groups, because there eob really means
// eob, so that case just exits the jit directly.
|=>m->jit_endofbuf_pclabel:
| cmp PTR, DECODER->delim_end
| jb ->exit_jit // We are at eob, but not end-of-submsg.
}
|=>m->jit_endofmsg_pclabel:
// We are at end-of-submsg: call endmsg handler (if any):
if (m->endmsg) {
// void endmsg(void *closure, upb_status *status) {
| mov ARG1_64, FRAME->closure
| lea ARG2_64, DECODER->dispatcher.status
| callp m->endmsg
}
if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
// Counter previous alignment.
| add rsp, 8
| ret
} else if (m->jit_parent_field_done_pclabel == UPB_TOPLEVEL_ONE) {
| jmp ->exit_jit
} else {
| jmp =>m->jit_parent_field_done_pclabel
}
}
static const char *dbgfmt =
"JIT encountered unknown field! wt=%d, fn=%d\n";
static void upb_decoder_jit(upb_decoder *d) {
| push rbp
| mov rbp, rsp
| push r15
| push r14
| push r13
| push r12
| push rbx
// Align stack.
| sub rsp, 8
| mov DECODER, ARG1_64
| mov FRAME, DECODER:ARG1_64->dispatcher.top
| lea STRREF, DECODER:ARG1_64->strref
| mov CLOSURE, FRAME->closure
| mov PTR, DECODER->ptr
upb_handlers *h = d->dispatcher.handlers;
if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
| call =>h->msgs[0]->jit_startmsg_pclabel
| jmp ->exit_jit
}
// TODO: push return addresses for re-entry (will be necessary for multiple
// buffer support).
for (int i = 0; i < h->msgs_len; i++) upb_decoder_jit_msg(d, h->msgs[i]);
|->exit_jit:
| mov DECODER->ptr, PTR
// Counter previous alignment.
| add rsp, 8
| pop rbx
| pop r12
| pop r13
| pop r14
| pop r15
| leave
| ret
|=>0:
| mov rdi, stderr
| mov rsi, dbgfmt
| callp fprintf
| callp abort
}
void upb_decoder_jit_assignfieldlabs(upb_fhandlers *f,
uint32_t *pclabel_count) {
f->jit_pclabel = (*pclabel_count)++;
f->jit_pclabel_notypecheck = (*pclabel_count)++;
f->jit_submsg_done_pclabel = (*pclabel_count)++;
}
void upb_decoder_jit_assignmsglabs(upb_mhandlers *m, uint32_t *pclabel_count) {
m->jit_startmsg_pclabel = (*pclabel_count)++;
m->jit_endofbuf_pclabel = (*pclabel_count)++;
m->jit_endofmsg_pclabel = (*pclabel_count)++;
m->jit_dyndispatch_pclabel = (*pclabel_count)++;
m->jit_unknownfield_pclabel = (*pclabel_count)++;
m->jit_parent_field_done_pclabel = UPB_NONE;
m->max_field_number = 0;
upb_inttable_iter i;
for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
uint32_t key = upb_inttable_iter_key(i);
m->max_field_number = UPB_MAX(m->max_field_number, key);
upb_fhandlers *f = upb_inttable_iter_value(i);
upb_decoder_jit_assignfieldlabs(f, pclabel_count);
}
// XXX: Won't work for large field numbers; will need to use a upb_table.
m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*));
}
// Second pass: for messages that have only one parent, link them to the field
// from which they are called.
void upb_decoder_jit_assignmsglabs2(upb_mhandlers *m) {
upb_inttable_iter i;
for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
upb_fhandlers *f = upb_inttable_iter_value(i);
if (upb_issubmsgtype(f->type)) {
upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
if (sub_m->jit_parent_field_done_pclabel == UPB_NONE) {
sub_m->jit_parent_field_done_pclabel = f->jit_submsg_done_pclabel;
} else {
sub_m->jit_parent_field_done_pclabel = UPB_MULTIPLE;
}
}
}
}
void upb_decoder_makejit(upb_decoder *d) {
d->debug_info = NULL;
// Assign pclabels.
uint32_t pclabel_count = 1;
upb_handlers *h = d->dispatcher.handlers;
for (int i = 0; i < h->msgs_len; i++)
upb_decoder_jit_assignmsglabs(h->msgs[i], &pclabel_count);
for (int i = 0; i < h->msgs_len; i++)
upb_decoder_jit_assignmsglabs2(h->msgs[i]);
if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_NONE) {
h->msgs[0]->jit_parent_field_done_pclabel = UPB_TOPLEVEL_ONE;
}
void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals));
dasm_init(d, 1);
dasm_setupglobal(d, globals, UPB_JIT_GLOBAL__MAX);
dasm_growpc(d, pclabel_count);
dasm_setup(d, upb_jit_actionlist);
upb_decoder_jit(d);
dasm_link(d, &d->jit_size);
d->jit_code = mmap(NULL, d->jit_size, PROT_READ | PROT_WRITE,
MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
upb_reg_jit_gdb(d);
dasm_encode(d, d->jit_code);
// Create dispatch tables.
for (int i = 0; i < h->msgs_len; i++) {
upb_mhandlers *m = h->msgs[i];
for (uint32_t j = 0; j <= m->max_field_number; j++) {
upb_fhandlers *f = NULL;
for (int k = 0; k < 8; k++) {
f = upb_inttable_lookup(&m->fieldtab, (j << 3) | k);
if (f) break;
}
if (f) {
m->tablearray[j] = d->jit_code + dasm_getpclabel(d, f->jit_pclabel);
} else {
// Don't handle unknown fields yet.
m->tablearray[j] = d->jit_code + dasm_getpclabel(d, 0);
}
}
}
dasm_free(d);
free(globals);
mprotect(d->jit_code, d->jit_size, PROT_EXEC | PROT_READ);
// View with: objdump -M intel -D -b binary -mi386 -Mx86-64 /tmp/machine-code
// Or: ndisasm -b 64 /tmp/machine-code
FILE *f = fopen("/tmp/machine-code", "wb");
fwrite(d->jit_code, d->jit_size, 1, f);
fclose(f);
}
void upb_decoder_freejit(upb_decoder *d) {
munmap(d->jit_code, d->jit_size);
free(d->debug_info);
// TODO: unregister
}