|
|
|
|//
|
|
|
|
|// upb - a minimalist implementation of protocol buffers.
|
|
|
|
|//
|
|
|
|
|// Copyright (c) 2011-2013 Google Inc. See LICENSE for details.
|
|
|
|
|// Author: Josh Haberman <jhaberman@gmail.com>
|
|
|
|
|//
|
|
|
|
|// JIT compiler for upb_pbdecoder on x86-64. Generates machine code from the
|
|
|
|
|// bytecode generated in compile_decoder.c.
|
|
|
|
|
|
|
|
|
|.arch x64
|
|
|
|
|.actionlist upb_jit_actionlist
|
|
|
|
|.globals UPB_JIT_GLOBAL_
|
|
|
|
|.globalnames upb_jit_globalnames
|
|
|
|
|
|
|
|
|
|// Calling conventions. Note -- this will need to be changed for
|
|
|
|
|// Windows, which uses a different calling convention!
|
|
|
|
|.define ARG1_64, rdi
|
|
|
|
|.define ARG2_8, r6b // DynASM's equivalent to "sil" -- low byte of esi.
|
|
|
|
|.define ARG2_32, esi
|
|
|
|
|.define ARG2_64, rsi
|
|
|
|
|.define ARG3_8, dl
|
|
|
|
|.define ARG3_32, edx
|
|
|
|
|.define ARG3_64, rdx
|
|
|
|
|.define ARG4_64, rcx
|
|
|
|
|.define ARG5_64, r8
|
|
|
|
|.define XMMARG1, xmm0
|
|
|
|
|
|
|
|
|
|// Register allocation / type map.
|
|
|
|
|// ALL of the code in this file uses these register allocations.
|
|
|
|
|// When we "call" within this file, we do not use regular calling
|
|
|
|
|// conventions, but of course when calling to user callbacks we must.
|
|
|
|
|.define PTR, rbx // DECODER->ptr (unsynced)
|
|
|
|
|.define DATAEND, r12 // DECODER->data_end (unsynced)
|
|
|
|
|.define CLOSURE, r13 // FRAME->closure (unsynced)
|
|
|
|
|.type FRAME, upb_pbdecoder_frame, r14 // DECODER->top (unsynced)
|
|
|
|
|.type DECODER, upb_pbdecoder, r15 // DECODER (immutable)
|
|
|
|
|.define DELIMEND, rbp
|
|
|
|
|
|
|
|
|
| // Spills unsynced registers back to memory.
|
|
|
|
|.macro commit_regs
|
|
|
|
| mov DECODER->top, FRAME
|
|
|
|
| mov DECODER->ptr, PTR
|
|
|
|
| mov DECODER->data_end, DATAEND
|
|
|
|
| // We don't guarantee that delim_end is NULL when out of range like the
|
|
|
|
| // interpreter does.
|
|
|
|
| mov DECODER->delim_end, DELIMEND
|
|
|
|
| sub DELIMEND, DECODER->buf
|
|
|
|
| add DELIMEND, DECODER->bufstart_ofs
|
|
|
|
| mov FRAME->end_ofs, DELIMEND
|
|
|
|
| mov FRAME->sink.closure, CLOSURE
|
|
|
|
|.endmacro
|
|
|
|
|
|
|
|
|
| // Loads unsynced registers from memory back into registers.
|
|
|
|
|.macro load_regs
|
|
|
|
| mov FRAME, DECODER->top
|
|
|
|
| mov PTR, DECODER->ptr
|
|
|
|
| mov DATAEND, DECODER->data_end
|
|
|
|
| mov CLOSURE, FRAME->sink.closure
|
|
|
|
| mov DELIMEND, FRAME->end_ofs
|
|
|
|
| sub DELIMEND, DECODER->bufstart_ofs
|
|
|
|
| add DELIMEND, DECODER->buf
|
|
|
|
|.endmacro
|
|
|
|
|
|
|
|
|
| // Calls an external C function at address "addr".
|
|
|
|
|.macro callp, addr
|
|
|
|
| mov64 rax, (uintptr_t)addr
|
|
|
|
|
|
|
|
|
| // Stack must be 16-byte aligned (x86-64 ABI requires this).
|
|
|
|
| //
|
|
|
|
| // OPT: possibly remove this by statically ensuring correct alignment.
|
|
|
|
| //
|
|
|
|
| // OPT: use "call rel32" where possible.
|
|
|
|
| push r12
|
|
|
|
| mov r12, rsp
|
|
|
|
| and rsp, 0xfffffffffffffff0UL // Align stack.
|
|
|
|
| call rax
|
|
|
|
| mov rsp, r12
|
|
|
|
| pop r12
|
|
|
|
|.endmacro
|
|
|
|
|
|
|
|
|
|.macro ld64, val
|
|
|
|
|| {
|
|
|
|
|| uintptr_t v = (uintptr_t)val;
|
|
|
|
|| if (v > 0xffffffff) {
|
|
|
|
| mov64 ARG2_64, v
|
|
|
|
|| } else if (v) {
|
|
|
|
| mov ARG2_32, v
|
|
|
|
|| } else {
|
|
|
|
| xor ARG2_32, ARG2_32
|
|
|
|
|| }
|
|
|
|
|| }
|
|
|
|
|.endmacro
|
|
|
|
|
|
|
|
|
|.macro load_handler_data, h, arg
|
|
|
|
| ld64 upb_handlers_gethandlerdata(h, arg)
|
|
|
|
|.endmacro
|
|
|
|
|
|
|
|
|
|.macro chkeob, bytes, target
|
|
|
|
|| if (bytes == 1) {
|
|
|
|
| cmp PTR, DATAEND
|
|
|
|
| je target
|
|
|
|
|| } else {
|
|
|
|
| mov rcx, DATAEND
|
|
|
|
| sub rcx, PTR
|
|
|
|
| cmp rcx, bytes
|
|
|
|
| jb target
|
|
|
|
|| }
|
|
|
|
|.endmacro
|
|
|
|
|
|
|
|
|
|.macro chkneob, bytes, target
|
|
|
|
|| if (bytes == 1) {
|
|
|
|
| cmp PTR, DATAEND
|
|
|
|
| jne target
|
|
|
|
|| } else {
|
|
|
|
| mov rcx, DATAEND
|
|
|
|
| sub rcx, PTR
|
|
|
|
| cmp rcx, bytes
|
|
|
|
| jae target
|
|
|
|
|| }
|
|
|
|
|.endmacro
|
|
|
|
|
|
|
|
|.macro sethas, reg, hasbit
|
|
|
|
|| if (hasbit >= 0) {
|
|
|
|
| or byte [reg + ((uint32_t)hasbit / 8)], (1 << ((uint32_t)hasbit % 8))
|
|
|
|
|| }
|
|
|
|
|.endmacro
|
|
|
|
|
|
|
|
|
| // Decodes 32-bit varint into rdx, inlining 1 byte.
|
|
|
|
|.macro dv32
|
|
|
|
| chkeob 1, >7
|
|
|
|
| movzx edx, byte [PTR]
|
|
|
|
| test dl, dl
|
|
|
|
| jns >8
|
|
|
|
|7:
|
|
|
|
| call ->decodev32_fallback
|
|
|
|
|8:
|
|
|
|
| add PTR, 1
|
|
|
|
|.endmacro
|
|
|
|
|
|
|
|
#define DECODE_EOF -3
|
|
|
|
|
|
|
|
static upb_func *gethandler(const upb_handlers *h, upb_selector_t sel) {
|
|
|
|
return h ? upb_handlers_gethandler(h, sel) : NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Defines an "assembly label" for the current code generation offset.
|
|
|
|
// This label exists *purely* for debugging purposes: it is emitted into
|
|
|
|
// the .so, and printed as part of JIT debugging output when UPB_JIT_LOAD_SO is
|
|
|
|
// defined.
|
|
|
|
//
|
|
|
|
// We would define this in the .c file except that it conditionally defines a
|
|
|
|
// pclabel.
|
|
|
|
static void asmlabel(jitcompiler *jc, const char *fmt, ...) {
|
|
|
|
#ifndef NDEBUG
|
|
|
|
int ofs = jc->dynasm->section->ofs;
|
|
|
|
assert(ofs != jc->lastlabelofs);
|
|
|
|
jc->lastlabelofs = ofs;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef UPB_JIT_LOAD_SO
|
|
|
|
UPB_UNUSED(jc);
|
|
|
|
UPB_UNUSED(fmt);
|
|
|
|
#else
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
|
|
|
char *str = upb_vasprintf(fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
|
|
|
|
int pclabel = alloc_pclabel(jc);
|
|
|
|
// Normally we would prefer to allocate this inline with the codegen,
|
|
|
|
// ie.
|
|
|
|
// |=>asmlabel(...)
|
|
|
|
// But since we do this conditionally, only when UPB_JIT_LOAD_SO is defined,
|
|
|
|
// we do it here instead.
|
|
|
|
|=>pclabel:
|
|
|
|
upb_inttable_insert(&jc->asmlabels, pclabel, upb_value_ptr(str));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
// Should only be called when the associated handler is known to exist.
|
|
|
|
static bool alwaysok(const upb_handlers *h, upb_selector_t sel) {
|
|
|
|
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
|
|
|
|
bool ok = upb_handlers_getattr(h, sel, &attr);
|
|
|
|
UPB_ASSERT_VAR(ok, ok);
|
|
|
|
bool ret = upb_handlerattr_alwaysok(&attr);
|
|
|
|
upb_handlerattr_uninit(&attr);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Emit static assembly routines; code that does not vary based on the message
|
|
|
|
// schema. Since it's not input-dependent, we only need one single copy of it.
|
|
|
|
// For the moment we generate a single copy per generated handlers. Eventually
|
|
|
|
// we should generate this code at compile time and link it into the binary so
|
|
|
|
// we have one copy total. To do that we'll want to be sure that it is within
|
|
|
|
// 2GB of our JIT code, so that branches between the two are near (rel32).
|
|
|
|
//
|
|
|
|
// We'd put this assembly in a .s file directly, but DynASM's ability to
|
|
|
|
// calculate structure offsets automatically is too useful to pass up (it's way
|
|
|
|
// more convenient to write DECODER->sink than [rbx + 0x96], especially since
|
|
|
|
// the latter would have to be changed whenever the structure is updated).
|
|
|
|
static void emit_static_asm(jitcompiler *jc) {
|
|
|
|
| // Trampolines for entering/exiting the JIT. These are a bit tricky to
|
|
|
|
| // support full resuming; when we suspend we copy the JIT's portion of
|
|
|
|
| // the call stack into the upb_pbdecoder and restore it when we resume.
|
|
|
|
asmlabel(jc, "enterjit");
|
|
|
|
|->enterjit:
|
|
|
|
|1:
|
|
|
|
| push rbp
|
|
|
|
| push r15
|
|
|
|
| push r14
|
|
|
|
| push r13
|
|
|
|
| push r12
|
|
|
|
| push rbx
|
|
|
|
|
|
|
|
|
| mov rbx, ARG2_64 // Preserve JIT method.
|
|
|
|
|
|
|
|
|
| mov DECODER, rdi
|
|
|
|
| callp upb_pbdecoder_resume // Same args as us; reuse regs.
|
|
|
|
| test eax, eax
|
|
|
|
| jns >1
|
|
|
|
| mov DECODER->saved_rsp, rsp
|
|
|
|
| mov rax, rbx
|
|
|
|
| load_regs
|
|
|
|
|
|
|
|
|
| // Test whether we have a saved stack to resume.
|
|
|
|
| mov ARG3_64, DECODER->call_len
|
|
|
|
| test ARG3_64, ARG3_64
|
|
|
|
| jnz >2
|
|
|
|
|
|
|
|
|
| call rax
|
|
|
|
|
|
|
|
|
| mov rax, DECODER->size_param
|
|
|
|
| mov qword DECODER->call_len, 0
|
|
|
|
|1:
|
|
|
|
| pop rbx
|
|
|
|
| pop r12
|
|
|
|
| pop r13
|
|
|
|
| pop r14
|
|
|
|
| pop r15
|
|
|
|
| pop rbp
|
|
|
|
| ret
|
|
|
|
|
|
|
|
|
|2:
|
|
|
|
| // Resume decoder.
|
|
|
|
| lea ARG2_64, DECODER->callstack
|
|
|
|
| sub rsp, ARG3_64
|
|
|
|
| mov ARG1_64, rsp
|
|
|
|
| callp memcpy // Restore stack.
|
|
|
|
| ret // Return to resumed function (not ->enterjit caller).
|
|
|
|
|
|
|
|
|
| // Other code can call this to suspend the JIT.
|
|
|
|
| // To the calling code, it will appear that the function returns when
|
|
|
|
| // the JIT resumes, and more buffer space will be available.
|
|
|
|
| // Args: eax=the value that decode() should return.
|
|
|
|
asmlabel(jc, "exitjit");
|
|
|
|
|->exitjit:
|
|
|
|
| // Save the stack into DECODER->callstack.
|
|
|
|
| lea ARG1_64, DECODER->callstack
|
|
|
|
| mov ARG2_64, rsp
|
|
|
|
| mov ARG3_64, DECODER->saved_rsp
|
|
|
|
| sub ARG3_64, rsp
|
|
|
|
| mov DECODER->call_len, ARG3_64 // Preserve len for next resume.
|
|
|
|
| mov ebx, eax // Preserve return value across memcpy.
|
|
|
|
| callp memcpy // Copy stack into decoder.
|
|
|
|
| mov eax, ebx // This will be our return value.
|
|
|
|
|
|
|
|
|
| // Must NOT do this before the memcpy(), otherwise memcpy() will
|
|
|
|
| // clobber the stack we are trying to save!
|
|
|
|
| mov rsp, DECODER->saved_rsp
|
|
|
|
| pop rbx
|
|
|
|
| pop r12
|
|
|
|
| pop r13
|
|
|
|
| pop r14
|
|
|
|
| pop r15
|
|
|
|
| pop rbp
|
|
|
|
| ret
|
|
|
|
|
|
|
|
|
| // Like suspend() in the C decoder, except that the function appears
|
|
|
|
| // (from the caller's perspective) not to return until the decoder is
|
|
|
|
| // resumed.
|
|
|
|
asmlabel(jc, "suspend");
|
|
|
|
|->suspend:
|
|
|
|
| cmp DECODER->ptr, PTR
|
|
|
|
| je >1
|
|
|
|
| mov DECODER->checkpoint, PTR
|
|
|
|
|1:
|
|
|
|
| commit_regs
|
|
|
|
| mov rdi, DECODER
|
|
|
|
| callp upb_pbdecoder_suspend
|
|
|
|
| jmp ->exitjit
|
|
|
|
|
|
|
|
|
asmlabel(jc, "pushlendelim");
|
|
|
|
|->pushlendelim:
|
|
|
|
|1:
|
|
|
|
| mov FRAME->sink.closure, CLOSURE
|
|
|
|
| mov DECODER->checkpoint, PTR
|
|
|
|
| dv32
|
|
|
|
| mov rcx, DELIMEND
|
|
|
|
| sub rcx, PTR
|
|
|
|
| sub rcx, rdx
|
|
|
|
| jb ->err // Len is greater than enclosing message.
|
|
|
|
| mov FRAME->end_ofs, rcx
|
|
|
|
| add FRAME, sizeof(upb_pbdecoder_frame)
|
|
|
|
| mov DELIMEND, PTR
|
|
|
|
| add DELIMEND, rdx
|
|
|
|
| cmp FRAME, DECODER->limit
|
|
|
|
| je >3 // Stack overflow
|
|
|
|
| mov dword FRAME->groupnum, 0
|
|
|
|
| test rcx, rcx
|
|
|
|
| jz >2
|
|
|
|
| mov DATAEND, DECODER->end
|
|
|
|
| cmp PTR, DELIMEND
|
|
|
|
| ja >2
|
|
|
|
| cmp DELIMEND, DATAEND
|
|
|
|
| ja >2
|
|
|
|
| mov DATAEND, DELIMEND // If DELIMEND >= PTR && DELIMEND < DATAEND
|
|
|
|
|2:
|
|
|
|
| ret
|
|
|
|
|3:
|
|
|
|
| // Error -- call seterr.
|
|
|
|
| mov PTR, DECODER->checkpoint // Rollback to before the delim len.
|
|
|
|
| // Prepare seterr args.
|
|
|
|
| mov ARG1_64, DECODER
|
|
|
|
| ld64 kPbDecoderStackOverflow
|
|
|
|
| callp upb_pbdecoder_seterr
|
|
|
|
| call ->suspend
|
|
|
|
| jmp <1
|
|
|
|
|
|
|
|
|
| // For getting a value that spans a buffer seam. Falls back to C.
|
|
|
|
| // Args: rdi=C decoding function (prototype: int f(upb_pbdecoder*, void*))
|
|
|
|
asmlabel(jc, "getvalue_slow");
|
|
|
|
|->getvalue_slow:
|
|
|
|
| sub rsp, 16 // Stack is [8-byte value, 8-byte func pointer]
|
|
|
|
| mov [rsp + 8], rdi // Need to preserve fptr across suspends.
|
|
|
|
|1:
|
|
|
|
| mov qword [rsp], 0 // For parsing routines that only parse 32 bits.
|
|
|
|
| mov ARG1_64, DECODER
|
|
|
|
| mov ARG2_64, rsp
|
|
|
|
| mov DECODER->checkpoint, PTR
|
|
|
|
| commit_regs
|
|
|
|
| call aword [rsp + 8]
|
|
|
|
| load_regs
|
|
|
|
| test eax, eax
|
|
|
|
| jns >2
|
|
|
|
| // Success; return parsed data (in rdx AND xmm0).
|
|
|
|
| mov rdx, [rsp]
|
|
|
|
| movsd xmm0, qword [rsp]
|
|
|
|
| add rsp, 16
|
|
|
|
| ret
|
|
|
|
|2:
|
|
|
|
| call ->exitjit // Return eax from decode function.
|
|
|
|
| jmp <1
|
|
|
|
|
|
|
|
|
asmlabel(jc, "parse_unknown");
|
|
|
|
| // Args: edx=fieldnum, cl=wire type
|
|
|
|
|->parse_unknown:
|
|
|
|
| // OPT: handle directly instead of kicking to C.
|
|
|
|
| // Check for ENDGROUP.
|
|
|
|
| mov ARG1_64, DECODER
|
|
|
|
| mov ARG2_32, edx
|
|
|
|
| movzx ARG3_32, cl
|
|
|
|
| commit_regs
|
|
|
|
| callp upb_pbdecoder_skipunknown
|
|
|
|
| load_regs
|
|
|
|
| cmp eax, DECODE_ENDGROUP
|
|
|
|
| jne >1
|
|
|
|
| ret // Return eax=DECODE_ENDGROUP, not zero
|
|
|
|
|1:
|
|
|
|
| cmp eax, DECODE_OK
|
|
|
|
| je >1
|
|
|
|
| call ->exitjit // Return eax from decode function.
|
|
|
|
|1:
|
|
|
|
| xor eax, eax
|
|
|
|
| ret
|
|
|
|
|
|
|
|
|
| // Fallback functions for parsing single values. These are used when the
|
|
|
|
| // buffer doesn't contain enough remaining data for the fast path. Each
|
|
|
|
| // primitive type (v32, v64, f32, f64) has two functions: decode & skip.
|
|
|
|
| // Decode functions return their value in rsi/esi.
|
|
|
|
| //
|
|
|
|
| // These functions leave PTR = value_end - fast_path_bytes, so that we can
|
|
|
|
| // re-join the fast path which will add fast_path_bytes after the callback
|
|
|
|
| // completes. We also set DECODER->ptr to this value which is a signal to
|
|
|
|
| // ->suspend that DECODER->checkpoint is up to date.
|
|
|
|
asmlabel(jc, "skip_decode_f32_fallback");
|
|
|
|
|->skipf32_fallback:
|
|
|
|
|->decodef32_fallback:
|
|
|
|
| mov64 rdi, (uintptr_t)upb_pbdecoder_decode_f32
|
|
|
|
| call ->getvalue_slow
|
|
|
|
| sub PTR, 4
|
|
|
|
| mov DECODER->ptr, PTR
|
|
|
|
| ret
|
|
|
|
|
|
|
|
|
asmlabel(jc, "skip_decode_f64_fallback");
|
|
|
|
|->skipf64_fallback:
|
|
|
|
|->decodef64_fallback:
|
|
|
|
| mov64 rdi, (uintptr_t)upb_pbdecoder_decode_f64
|
|
|
|
| call ->getvalue_slow
|
|
|
|
| sub PTR, 8
|
|
|
|
| mov DECODER->ptr, PTR
|
|
|
|
| ret
|
|
|
|
|
|
|
|
|
| // Called for varint >= 1 byte.
|
|
|
|
asmlabel(jc, "skip_decode_v32_fallback");
|
|
|
|
|->skipv32_fallback:
|
|
|
|
|->skipv64_fallback:
|
|
|
|
| chkeob 16, >1
|
|
|
|
| // With at least 16 bytes left, we can do a branch-less SSE version.
|
|
|
|
| movdqu xmm0, [PTR]
|
|
|
|
| pmovmskb eax, xmm0 // bits 0-15 are continuation bits, 16-31 are 0.
|
|
|
|
| not eax
|
|
|
|
| bsf eax, eax
|
|
|
|
| cmp al, 10
|
|
|
|
| jae ->decode_varint_slow // Error (>10 byte varint).
|
|
|
|
| add PTR, rax // bsf result is 0-based, so PTR=end-1, as desired.
|
|
|
|
| ret
|
|
|
|
|
|
|
|
|
|1:
|
|
|
|
| // With fewer than 16 bytes, we have to read byte by byte.
|
|
|
|
| lea rcx, [PTR + 10]
|
|
|
|
| mov rax, PTR // Preserve PTR in case of fallback to slow path.
|
|
|
|
| cmp rcx, DATAEND
|
|
|
|
| cmova rcx, DATAEND // rcx = MIN(DATAEND, PTR + 10)
|
|
|
|
|2:
|
|
|
|
| cmp rax, rcx
|
|
|
|
| je ->decode_varint_slow
|
|
|
|
| test byte [rax], 0x80
|
|
|
|
| jz >3
|
|
|
|
| add rax, 1
|
|
|
|
| jmp <2
|
|
|
|
|3:
|
|
|
|
| mov PTR, rax // PTR = varint_end - 1, as desired
|
|
|
|
| ret
|
|
|
|
|
|
|
|
|
| // Returns tag in edx
|
|
|
|
asmlabel(jc, "decode_unknown_tag_fallback");
|
|
|
|
|->decode_unknown_tag_fallback:
|
|
|
|
| sub rsp, 16
|
|
|
|
|1:
|
|
|
|
| cmp PTR, DELIMEND
|
|
|
|
| jne >2
|
|
|
|
| add rsp, 16
|
|
|
|
| xor eax, eax
|
|
|
|
| ret
|
|
|
|
|2:
|
|
|
|
| // OPT: Have a medium-fast path before falling back to _slow.
|
|
|
|
| mov ARG1_64, DECODER
|
|
|
|
| mov ARG2_64, rsp
|
|
|
|
| commit_regs
|
|
|
|
| callp upb_pbdecoder_decode_varint_slow
|
|
|
|
| load_regs
|
|
|
|
| cmp eax, 0
|
|
|
|
| jge >3
|
|
|
|
| mov edx, [rsp] // Success; return parsed data.
|
|
|
|
| add rsp, 16
|
|
|
|
| ret
|
|
|
|
|3:
|
|
|
|
| call ->exitjit // Return eax from decode function.
|
|
|
|
| jmp <1
|
|
|
|
|
|
|
|
|
| // Called for varint >= 1 byte.
|
|
|
|
asmlabel(jc, "decode_v32_v64_fallback");
|
|
|
|
|->decodev32_fallback:
|
|
|
|
|->decodev64_fallback:
|
|
|
|
| chkeob 10, ->decode_varint_slow
|
|
|
|
| // OPT: do something faster than just calling the C version.
|
|
|
|
| mov rdi, PTR
|
|
|
|
| callp upb_vdecode_fast
|
|
|
|
| test rax, rax
|
|
|
|
| je ->decode_varint_slow // Unterminated varint.
|
|
|
|
| mov PTR, rax
|
|
|
|
| sub PTR, 1
|
|
|
|
| mov DECODER->ptr, PTR
|
|
|
|
| ret
|
|
|
|
|
|
|
|
|
asmlabel(jc, "decode_varint_slow");
|
|
|
|
|->decode_varint_slow:
|
|
|
|
| // Slow path: end of buffer or error (varint length >= 10).
|
|
|
|
| mov64 rdi, (uintptr_t)upb_pbdecoder_decode_varint_slow
|
|
|
|
| call ->getvalue_slow
|
|
|
|
| sub PTR, 1
|
|
|
|
| mov DECODER->ptr, PTR
|
|
|
|
| ret
|
|
|
|
|
|
|
|
|
| // Args: rsi=expected tag, return=rax (DECODE_{OK,MISMATCH})
|
|
|
|
asmlabel(jc, "checktag_fallback");
|
|
|
|
|->checktag_fallback:
|
|
|
|
| sub rsp, 8
|
|
|
|
| mov [rsp], rsi // Preserve expected tag.
|
|
|
|
|1:
|
|
|
|
| mov ARG1_64, DECODER
|
|
|
|
| commit_regs
|
|
|
|
| mov DECODER->checkpoint, PTR
|
|
|
|
| callp upb_pbdecoder_checktag_slow
|
|
|
|
| load_regs
|
|
|
|
| cmp eax, 0
|
|
|
|
| jge >2
|
|
|
|
| add rsp, 8
|
|
|
|
| ret
|
|
|
|
|2:
|
|
|
|
| call ->exitjit
|
|
|
|
| mov rsi, [rsp]
|
|
|
|
| cmp PTR, DELIMEND
|
|
|
|
| jne <1
|
|
|
|
| mov eax, DECODE_EOF
|
|
|
|
| add rsp, 8
|
|
|
|
| ret
|
|
|
|
|
|
|
|
|
| // Args: rsi=upb_inttable, rdx=key, return=rax (-1 if not found).
|
|
|
|
| // Preserves: rcx, rdx
|
|
|
|
| // OPT: Could write this in assembly if it's a hotspot.
|
|
|
|
asmlabel(jc, "hashlookup");
|
|
|
|
|->hashlookup:
|
|
|
|
| push rcx
|
|
|
|
| push rdx
|
|
|
|
| sub rsp, 16
|
|
|
|
| mov rdi, rsi
|
|
|
|
| mov rsi, rdx
|
|
|
|
| mov rdx, rsp
|
|
|
|
| callp upb_inttable_lookup
|
|
|
|
| add rsp, 16
|
|
|
|
| pop rdx
|
|
|
|
| pop rcx
|
|
|
|
| test al, al
|
|
|
|
| jz >2 // Unknown field.
|
|
|
|
| mov rax, [rsp-32] // Value from table.
|
|
|
|
| ret
|
|
|
|
|2:
|
|
|
|
| xor rax, rax
|
|
|
|
| not rax
|
|
|
|
| ret
|
|
|
|
}
|
|
|
|
|
|
|
|
static void jitprimitive(jitcompiler *jc, opcode op,
|
|
|
|
const upb_handlers *h, upb_selector_t sel) {
|
|
|
|
typedef enum { V32, V64, F32, F64, X } valtype_t;
|
|
|
|
static valtype_t types[] = {
|
|
|
|
X, F64, F32, V64, V64, V32, F64, F32, V64, X, X, X, X, V32, V32, F32, F64,
|
|
|
|
V32, V64 };
|
|
|
|
static char fastpath_bytes[] = { 1, 1, 4, 8 };
|
|
|
|
const valtype_t type = types[op];
|
|
|
|
const int fastbytes = fastpath_bytes[type];
|
|
|
|
upb_func *handler = gethandler(h, sel);
|
|
|
|
|
|
|
|
if (handler) {
|
|
|
|
|1:
|
|
|
|
| chkneob fastbytes, >3
|
|
|
|
|2:
|
|
|
|
switch (type) {
|
|
|
|
case V32:
|
|
|
|
| call ->decodev32_fallback
|
|
|
|
break;
|
|
|
|
case V64:
|
|
|
|
| call ->decodev64_fallback
|
|
|
|
break;
|
|
|
|
case F32:
|
|
|
|
| call ->decodef32_fallback
|
|
|
|
break;
|
|
|
|
case F64:
|
|
|
|
| call ->decodef64_fallback
|
|
|
|
break;
|
|
|
|
case X: break;
|
|
|
|
}
|
|
|
|
| jmp >4
|
|
|
|
|
|
|
|
// Fast path decode; for when check_bytes bytes are available.
|
|
|
|
|3:
|
|
|
|
switch (op) {
|
|
|
|
case OP_PARSE_SFIXED32:
|
|
|
|
case OP_PARSE_FIXED32:
|
|
|
|
| mov edx, dword [PTR]
|
|
|
|
break;
|
|
|
|
case OP_PARSE_SFIXED64:
|
|
|
|
case OP_PARSE_FIXED64:
|
|
|
|
| mov rdx, qword [PTR]
|
|
|
|
break;
|
|
|
|
case OP_PARSE_FLOAT:
|
|
|
|
| movss xmm0, dword [PTR]
|
|
|
|
break;
|
|
|
|
case OP_PARSE_DOUBLE:
|
|
|
|
| movsd xmm0, qword [PTR]
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
// Inline one byte of varint decoding.
|
|
|
|
| movzx edx, byte [PTR]
|
|
|
|
| test dl, dl
|
|
|
|
| js <2 // Fallback to slow path for >1 byte varint.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Second-stage decode; used for both fast and slow paths
|
|
|
|
// (only needed for a few types).
|
|
|
|
|4:
|
|
|
|
switch (op) {
|
|
|
|
case OP_PARSE_SINT32:
|
|
|
|
// 32-bit zig-zag decode.
|
|
|
|
| mov eax, edx
|
|
|
|
| shr edx, 1
|
|
|
|
| and eax, 1
|
|
|
|
| neg eax
|
|
|
|
| xor edx, eax
|
|
|
|
break;
|
|
|
|
case OP_PARSE_SINT64:
|
|
|
|
// 64-bit zig-zag decode.
|
|
|
|
| mov rax, rdx
|
|
|
|
| shr rdx, 1
|
|
|
|
| and rax, 1
|
|
|
|
| neg rax
|
|
|
|
| xor rdx, rax
|
|
|
|
break;
|
|
|
|
case OP_PARSE_BOOL:
|
|
|
|
| test rdx, rdx
|
|
|
|
| setne dl
|
|
|
|
break;
|
|
|
|
default: break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Call callback (or specialize if we can).
|
|
|
|
upb_fieldtype_t type;
|
|
|
|
const upb_shim_data *data = upb_shim_getdata(h, sel, &type);
|
|
|
|
if (data) {
|
|
|
|
switch (type) {
|
|
|
|
case UPB_TYPE_INT64:
|
|
|
|
case UPB_TYPE_UINT64:
|
|
|
|
| mov [CLOSURE + data->offset], rdx
|
|
|
|
break;
|
|
|
|
case UPB_TYPE_INT32:
|
|
|
|
case UPB_TYPE_UINT32:
|
|
|
|
case UPB_TYPE_ENUM:
|
|
|
|
| mov [CLOSURE + data->offset], edx
|
|
|
|
break;
|
|
|
|
case UPB_TYPE_DOUBLE:
|
|
|
|
| movsd qword [CLOSURE + data->offset], XMMARG1
|
|
|
|
break;
|
|
|
|
case UPB_TYPE_FLOAT:
|
|
|
|
| movss dword [CLOSURE + data->offset], XMMARG1
|
|
|
|
break;
|
|
|
|
case UPB_TYPE_BOOL:
|
|
|
|
| mov [CLOSURE + data->offset], dl
|
|
|
|
break;
|
|
|
|
case UPB_TYPE_STRING:
|
|
|
|
case UPB_TYPE_BYTES:
|
|
|
|
case UPB_TYPE_MESSAGE:
|
|
|
|
assert(false); break;
|
|
|
|
}
|
|
|
|
| sethas CLOSURE, data->hasbit
|
|
|
|
} else if (handler) {
|
|
|
|
| mov ARG1_64, CLOSURE
|
|
|
|
| load_handler_data h, sel
|
|
|
|
| callp handler
|
|
|
|
if (!alwaysok(h, sel)) {
|
|
|
|
| test al, al
|
|
|
|
| jnz >5
|
|
|
|
| call ->suspend
|
|
|
|
| jmp <1
|
|
|
|
|5:
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// We do this last so that the checkpoint is not advanced past the user's
|
|
|
|
// data until the callback has returned success.
|
|
|
|
| add PTR, fastbytes
|
|
|
|
} else {
|
|
|
|
// No handler registered for this value, just skip it.
|
|
|
|
| chkneob fastbytes, >3
|
|
|
|
|2:
|
|
|
|
switch (type) {
|
|
|
|
case V32:
|
|
|
|
| call ->skipv32_fallback
|
|
|
|
break;
|
|
|
|
case V64:
|
|
|
|
| call ->skipv64_fallback
|
|
|
|
break;
|
|
|
|
case F32:
|
|
|
|
| call ->skipf32_fallback
|
|
|
|
break;
|
|
|
|
case F64:
|
|
|
|
| call ->skipf64_fallback
|
|
|
|
break;
|
|
|
|
case X: break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fast-path skip.
|
|
|
|
|3:
|
|
|
|
if (type == V32 || type == V64) {
|
|
|
|
| test byte [PTR], 0x80
|
|
|
|
| jnz <2
|
|
|
|
}
|
|
|
|
| add PTR, fastbytes
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void jitdispatch(jitcompiler *jc,
|
|
|
|
const upb_pbdecodermethod *method) {
|
|
|
|
// Lots of room for tweaking/optimization here.
|
|
|
|
|
|
|
|
const upb_inttable *dispatch = &method->dispatch;
|
|
|
|
bool has_hash_entries = (dispatch->t.count > 0);
|
|
|
|
|
|
|
|
// Whether any of the fields for this message can have two wire types which
|
|
|
|
// are both valid (packed & non-packed).
|
|
|
|
//
|
|
|
|
// OPT: populate this more precisely; not all messages with hash entries have
|
|
|
|
// this characteristic.
|
|
|
|
bool has_multi_wiretype = has_hash_entries;
|
|
|
|
|
|
|
|
|=>define_jmptarget(jc, &method->dispatch):
|
|
|
|
|1:
|
|
|
|
// Decode the field tag.
|
|
|
|
| mov aword DECODER->checkpoint, PTR
|
|
|
|
| chkeob 2, >6
|
|
|
|
| movzx edx, byte [PTR]
|
|
|
|
| test dl, dl
|
|
|
|
| jns >7 // Jump if first byte has no continuation bit.
|
|
|
|
| movzx ecx, byte [PTR + 1]
|
|
|
|
| test cl, cl
|
|
|
|
| js >6 // Jump if second byte has continuation bit.
|
|
|
|
| // Confirmed two-byte varint.
|
|
|
|
| shl ecx, 7
|
|
|
|
| and edx, 0x7f
|
|
|
|
| or edx, ecx
|
|
|
|
| add PTR, 2
|
|
|
|
| jmp >8
|
|
|
|
|6:
|
|
|
|
| call ->decode_unknown_tag_fallback
|
|
|
|
| test eax, eax // Hit DELIMEND?
|
|
|
|
| jnz >8
|
|
|
|
| ret
|
|
|
|
|7:
|
|
|
|
| add PTR, 1
|
|
|
|
|8:
|
|
|
|
| mov ecx, edx
|
|
|
|
| shr edx, 3
|
|
|
|
| and cl, 7
|
|
|
|
|
|
|
|
// See comment attached to upb_pbdecodermethod.dispatch for layout of the
|
|
|
|
// dispatch table.
|
|
|
|
|2:
|
|
|
|
| cmp edx, dispatch->array_size
|
|
|
|
if (has_hash_entries) {
|
|
|
|
| jae >7
|
|
|
|
} else {
|
|
|
|
| jae >5
|
|
|
|
}
|
|
|
|
| // OPT: Compact the lookup arr into 32-bit entries.
|
|
|
|
if ((uintptr_t)dispatch->array > 0x7fffffff) {
|
|
|
|
| mov64 rax, (uintptr_t)dispatch->array
|
|
|
|
| mov rax, qword [rax + rdx * 8]
|
|
|
|
} else {
|
|
|
|
| mov rax, qword [rdx * 8 + dispatch->array]
|
|
|
|
}
|
|
|
|
|3:
|
|
|
|
| // We take advantage of the fact that non-present entries are stored
|
|
|
|
| // as -1, which will result in wire types that will never match.
|
|
|
|
| cmp al, cl
|
|
|
|
if (has_multi_wiretype) {
|
|
|
|
| jne >6
|
|
|
|
} else {
|
|
|
|
| jne >5
|
|
|
|
}
|
|
|
|
| shr rax, 16
|
|
|
|
|
|
|
|
|
| // Load the machine code address from the table entry.
|
|
|
|
| // The table entry is relative to the dispatch->array jmptarget
|
|
|
|
| // (patchdispatch() took care of this) which is the same as
|
|
|
|
| // local label "4". The "lea" is really just trying to do
|
|
|
|
| // lea rax, [>4 + rax]
|
|
|
|
| //
|
|
|
|
| // But we can't write that directly for some reason, so we use
|
|
|
|
| // rdx as a temporary.
|
|
|
|
| lea rdx, [>4]
|
|
|
|
|=>define_jmptarget(jc, dispatch->array):
|
|
|
|
|4:
|
|
|
|
| add rax, rdx
|
|
|
|
| ret
|
|
|
|
|
|
|
|
|
|5:
|
|
|
|
| // Field isn't in our table.
|
|
|
|
| call ->parse_unknown
|
|
|
|
| test eax, eax // ENDGROUP?
|
|
|
|
| jz <1
|
|
|
|
| lea rax, [>9] // ENDGROUP; Load address of OP_ENDMSG.
|
|
|
|
| ret
|
|
|
|
|
|
|
|
if (has_multi_wiretype) {
|
|
|
|
|6:
|
|
|
|
| // Primary wire type didn't match, check secondary wire type.
|
|
|
|
| cmp ah, cl
|
|
|
|
| jne <5
|
|
|
|
| // Secondary wire type is a match, look up fn + UPB_MAX_FIELDNUMBER.
|
|
|
|
| add rdx, UPB_MAX_FIELDNUMBER
|
|
|
|
| // This key will never be in the array part, so do a hash lookup.
|
|
|
|
assert(has_hash_entries);
|
|
|
|
| ld64 dispatch
|
|
|
|
| jmp ->hashlookup // Tail call.
|
|
|
|
}
|
|
|
|
|
|
|
|
if (has_hash_entries) {
|
|
|
|
|7:
|
|
|
|
| // Hash table lookup.
|
|
|
|
| ld64 dispatch
|
|
|
|
| call ->hashlookup
|
|
|
|
| jmp <3
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void jittag(jitcompiler *jc, uint64_t tag, int n, int ofs,
|
|
|
|
const upb_pbdecodermethod *method) {
|
|
|
|
// Internally we parse unknown fields; if this runs us into DELIMEND we jump
|
|
|
|
// to the corresponding DELIMEND target (either msg end or repeated field
|
|
|
|
// end), which we find from the OP_CHECKDELIM which must have necessarily
|
|
|
|
// preceded us.
|
|
|
|
uint32_t last_instruction = *(jc->pc - 2);
|
|
|
|
int last_arg = (int32_t)last_instruction >> 8;
|
|
|
|
assert((last_instruction & 0xff) == OP_CHECKDELIM);
|
|
|
|
uint32_t *delimend = (jc->pc - 1) + last_arg;
|
|
|
|
const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
|
|
|
|
|
|
|
|
if (getop(*(jc->pc - 1)) == OP_TAGN) {
|
|
|
|
jc->pc += ptr_words;
|
|
|
|
}
|
|
|
|
|
|
|
|
| chkneob n, >1
|
|
|
|
|
|
|
|
| // OPT: this is way too much fallback code to put here.
|
|
|
|
| // Reduce and/or move to a separate section to make better icache usage.
|
|
|
|
| ld64 tag
|
|
|
|
| call ->checktag_fallback
|
|
|
|
| cmp eax, DECODE_MISMATCH
|
|
|
|
| je >3
|
|
|
|
| cmp eax, DECODE_EOF
|
|
|
|
| je =>jmptarget(jc, delimend)
|
|
|
|
| jmp >5
|
|
|
|
|
|
|
|
|1:
|
|
|
|
switch (n) {
|
|
|
|
case 1:
|
|
|
|
| cmp byte [PTR], tag
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
| cmp word [PTR], tag
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
| // OPT: Slightly more efficient code, but depends on an extra byte.
|
|
|
|
| // mov eax, dword [PTR]
|
|
|
|
| // shl eax, 8
|
|
|
|
| // cmp eax, tag << 8
|
|
|
|
| cmp word [PTR], (tag & 0xffff)
|
|
|
|
| jne >2
|
|
|
|
| cmp byte [PTR + 2], (tag >> 16)
|
|
|
|
|2:
|
|
|
|
break;
|
|
|
|
case 4:
|
|
|
|
| cmp dword [PTR], tag
|
|
|
|
break;
|
|
|
|
case 5:
|
|
|
|
| cmp dword [PTR], (tag & 0xffffffff)
|
|
|
|
| jne >3
|
|
|
|
| cmp byte [PTR + 4], (tag >> 32)
|
|
|
|
}
|
|
|
|
| je >4
|
|
|
|
|3:
|
|
|
|
if (ofs == 0) {
|
|
|
|
| call =>jmptarget(jc, &method->dispatch)
|
|
|
|
| test rax, rax
|
|
|
|
| jz =>jmptarget(jc, delimend)
|
|
|
|
| jmp rax
|
|
|
|
} else {
|
|
|
|
| jmp =>jmptarget(jc, jc->pc + ofs)
|
|
|
|
}
|
|
|
|
|4:
|
|
|
|
| add PTR, n
|
|
|
|
|5:
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compile the bytecode to x64.
|
|
|
|
static void jitbytecode(jitcompiler *jc) {
|
|
|
|
upb_pbdecodermethod *method = NULL;
|
|
|
|
const upb_handlers *h = NULL;
|
|
|
|
for (jc->pc = jc->group->bytecode; jc->pc < jc->group->bytecode_end; ) {
|
|
|
|
int32_t instr = *jc->pc;
|
|
|
|
opcode op = instr & 0xff;
|
|
|
|
uint32_t arg = instr >> 8;
|
|
|
|
int32_t longofs = arg;
|
|
|
|
|
|
|
|
if (op != OP_SETDISPATCH) {
|
|
|
|
// Skipped for SETDISPATCH because it defines its own asmlabel for the
|
|
|
|
// dispatch code it emits.
|
|
|
|
asmlabel(jc, "0x%lx.%s", pcofs(jc), upb_pbdecoder_getopname(op));
|
|
|
|
|
|
|
|
// Skipped for SETDISPATCH because it should point at the function
|
|
|
|
// prologue, not the dispatch function that is emitted first.
|
|
|
|
// TODO: optimize this to only define pclabels that are actually used.
|
|
|
|
|=>define_jmptarget(jc, jc->pc):
|
|
|
|
}
|
|
|
|
|
|
|
|
jc->pc++;
|
|
|
|
|
|
|
|
switch (op) {
|
|
|
|
case OP_STARTMSG: {
|
|
|
|
upb_func *startmsg = gethandler(h, UPB_STARTMSG_SELECTOR);
|
|
|
|
if (startmsg) {
|
|
|
|
// bool startmsg(void *closure, const void *hd)
|
|
|
|
|1:
|
|
|
|
| mov ARG1_64, CLOSURE
|
|
|
|
| load_handler_data h, UPB_STARTMSG_SELECTOR
|
|
|
|
| callp startmsg
|
|
|
|
if (!alwaysok(h, UPB_STARTMSG_SELECTOR)) {
|
|
|
|
| test al, al
|
|
|
|
| jnz >2
|
|
|
|
| call ->suspend
|
|
|
|
| jmp <1
|
|
|
|
|2:
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
| nop
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OP_ENDMSG: {
|
|
|
|
upb_func *endmsg = gethandler(h, UPB_ENDMSG_SELECTOR);
|
|
|
|
|9:
|
|
|
|
if (endmsg) {
|
|
|
|
// bool endmsg(void *closure, const void *hd, upb_status *status)
|
|
|
|
| mov ARG1_64, CLOSURE
|
|
|
|
| load_handler_data h, UPB_ENDMSG_SELECTOR
|
|
|
|
| mov ARG3_64, DECODER->status
|
|
|
|
| callp endmsg
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OP_SETDISPATCH: {
|
|
|
|
uint32_t *op_pc = jc->pc - 1;
|
|
|
|
|
|
|
|
// Load info for new method.
|
|
|
|
upb_inttable *dispatch;
|
|
|
|
memcpy(&dispatch, jc->pc, sizeof(void*));
|
|
|
|
jc->pc += sizeof(void*) / sizeof(uint32_t);
|
|
|
|
// The OP_SETDISPATCH bytecode contains a pointer that is
|
|
|
|
// &method->dispatch; we want to go backwards and recover method.
|
|
|
|
method =
|
|
|
|
(void*)((char*)dispatch - offsetof(upb_pbdecodermethod, dispatch));
|
|
|
|
// May be NULL, in which case no handlers for this message will be found.
|
|
|
|
// OPT: we should do better by completely skipping the message in this
|
|
|
|
// case instead of parsing it field by field. We should also do the skip
|
|
|
|
// in the containing message's code.
|
|
|
|
h = method->dest_handlers_;
|
|
|
|
const char *msgname = upb_msgdef_fullname(upb_handlers_msgdef(h));
|
|
|
|
|
|
|
|
// Emit dispatch code for new method.
|
|
|
|
asmlabel(jc, "0x%lx.dispatch.%s", pcofs(jc), msgname);
|
|
|
|
jitdispatch(jc, method);
|
|
|
|
|
|
|
|
// Emit function prologue for new method.
|
|
|
|
asmlabel(jc, "0x%lx.parse.%s", pcofs(jc), msgname);
|
|
|
|
|=>define_jmptarget(jc, op_pc):
|
|
|
|
|=>define_jmptarget(jc, method):
|
|
|
|
| sub rsp, 8
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OP_PARSE_DOUBLE:
|
|
|
|
case OP_PARSE_FLOAT:
|
|
|
|
case OP_PARSE_INT64:
|
|
|
|
case OP_PARSE_UINT64:
|
|
|
|
case OP_PARSE_INT32:
|
|
|
|
case OP_PARSE_FIXED64:
|
|
|
|
case OP_PARSE_FIXED32:
|
|
|
|
case OP_PARSE_BOOL:
|
|
|
|
case OP_PARSE_UINT32:
|
|
|
|
case OP_PARSE_SFIXED32:
|
|
|
|
case OP_PARSE_SFIXED64:
|
|
|
|
case OP_PARSE_SINT32:
|
|
|
|
case OP_PARSE_SINT64:
|
|
|
|
jitprimitive(jc, op, h, arg);
|
|
|
|
break;
|
|
|
|
case OP_STARTSEQ:
|
|
|
|
case OP_STARTSUBMSG:
|
|
|
|
case OP_STARTSTR: {
|
|
|
|
upb_func *start = gethandler(h, arg);
|
|
|
|
if (start) {
|
|
|
|
// void *startseq(void *closure, const void *hd)
|
|
|
|
// void *startsubmsg(void *closure, const void *hd)
|
|
|
|
// void *startstr(void *closure, const void *hd, size_t size_hint)
|
|
|
|
|1:
|
|
|
|
| mov ARG1_64, CLOSURE
|
|
|
|
| load_handler_data h, arg
|
|
|
|
if (op == OP_STARTSTR) {
|
|
|
|
| mov ARG3_64, DELIMEND
|
|
|
|
| sub ARG3_64, PTR
|
|
|
|
}
|
|
|
|
| callp start
|
|
|
|
if (!alwaysok(h, arg)) {
|
|
|
|
| test rax, rax
|
|
|
|
| jnz >2
|
|
|
|
| call ->suspend
|
|
|
|
| jmp <1
|
|
|
|
|2:
|
|
|
|
}
|
|
|
|
| mov CLOSURE, rax
|
|
|
|
} else {
|
|
|
|
// TODO: nop is only required because of asmlabel().
|
|
|
|
| nop
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OP_ENDSEQ:
|
|
|
|
case OP_ENDSUBMSG:
|
|
|
|
case OP_ENDSTR: {
|
|
|
|
upb_func *end = gethandler(h, arg);
|
|
|
|
if (end) {
|
|
|
|
// bool endseq(void *closure, const void *hd)
|
|
|
|
// bool endsubmsg(void *closure, const void *hd)
|
|
|
|
// bool endstr(void *closure, const void *hd)
|
|
|
|
|1:
|
|
|
|
| mov ARG1_64, CLOSURE
|
|
|
|
| load_handler_data h, arg
|
|
|
|
| callp end
|
|
|
|
if (!alwaysok(h, arg)) {
|
|
|
|
| test al, al
|
|
|
|
| jnz >2
|
|
|
|
| call ->suspend
|
|
|
|
| jmp <1
|
|
|
|
|2:
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// TODO: nop is only required because of asmlabel().
|
|
|
|
| nop
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OP_STRING: {
|
|
|
|
upb_func *str = gethandler(h, arg);
|
|
|
|
| cmp PTR, DELIMEND
|
|
|
|
| je >4
|
|
|
|
|1:
|
|
|
|
| cmp PTR, DATAEND
|
|
|
|
| jne >2
|
|
|
|
| call ->suspend
|
|
|
|
| jmp <1
|
|
|
|
|2:
|
|
|
|
if (str) {
|
|
|
|
// size_t str(void *closure, const void *hd, const char *str, size_t n)
|
|
|
|
| mov ARG1_64, CLOSURE
|
|
|
|
| load_handler_data h, arg
|
|
|
|
| mov ARG3_64, PTR
|
|
|
|
| mov ARG4_64, DATAEND
|
|
|
|
| sub ARG4_64, PTR
|
|
|
|
| mov ARG5_64, qword DECODER->handle
|
|
|
|
| callp str
|
|
|
|
| add PTR, rax
|
|
|
|
if (!alwaysok(h, arg)) {
|
|
|
|
| cmp PTR, DATAEND
|
|
|
|
| je >3
|
|
|
|
| call ->strret_fallback
|
|
|
|
|3:
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
| mov PTR, DATAEND
|
|
|
|
}
|
|
|
|
| cmp PTR, DELIMEND
|
|
|
|
| jne <1
|
|
|
|
|4:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OP_PUSHTAGDELIM:
|
|
|
|
| mov FRAME->sink.closure, CLOSURE
|
|
|
|
| // This shouldn't need to be read, because tag-delimited fields
|
|
|
|
| // shouldn't have an OP_SETDELIM after them. But for the moment
|
|
|
|
| // non-packed repeated fields do OP_SETDELIM so they can share more
|
|
|
|
| // code with the packed code-path. If this is changed later, this
|
|
|
|
| // store can be removed.
|
|
|
|
| mov qword FRAME->end_ofs, 0
|
|
|
|
| add FRAME, sizeof(upb_pbdecoder_frame)
|
|
|
|
| cmp FRAME, DECODER->limit
|
|
|
|
| je ->err
|
|
|
|
| mov dword FRAME->groupnum, arg
|
|
|
|
break;
|
|
|
|
case OP_PUSHLENDELIM:
|
|
|
|
| call ->pushlendelim
|
|
|
|
break;
|
|
|
|
case OP_POP:
|
|
|
|
| sub FRAME, sizeof(upb_pbdecoder_frame)
|
|
|
|
| mov CLOSURE, FRAME->sink.closure
|
|
|
|
break;
|
|
|
|
case OP_SETDELIM:
|
|
|
|
// OPT: experiment with testing vs old offset to optimize away.
|
|
|
|
| mov DATAEND, DECODER->end
|
|
|
|
| add DELIMEND, FRAME->end_ofs
|
|
|
|
| cmp DELIMEND, DECODER->buf
|
|
|
|
| jb >1
|
|
|
|
| cmp DELIMEND, DATAEND
|
|
|
|
| ja >1 // OPT: try cmov.
|
|
|
|
| mov DATAEND, DELIMEND
|
|
|
|
|1:
|
|
|
|
break;
|
|
|
|
case OP_SETBIGGROUPNUM:
|
|
|
|
| mov dword FRAME->groupnum, *jc->pc++
|
|
|
|
break;
|
|
|
|
case OP_CHECKDELIM:
|
|
|
|
| cmp DELIMEND, PTR
|
|
|
|
| je =>jmptarget(jc, jc->pc + longofs)
|
|
|
|
break;
|
|
|
|
case OP_CALL:
|
|
|
|
| call =>jmptarget(jc, jc->pc + longofs)
|
|
|
|
break;
|
|
|
|
case OP_BRANCH:
|
|
|
|
| jmp =>jmptarget(jc, jc->pc + longofs);
|
|
|
|
break;
|
|
|
|
case OP_RET:
|
|
|
|
|9:
|
|
|
|
| add rsp, 8
|
|
|
|
| ret
|
|
|
|
break;
|
|
|
|
case OP_TAG1:
|
|
|
|
jittag(jc, (arg >> 8) & 0xff, 1, (int8_t)arg, method);
|
|
|
|
break;
|
|
|
|
case OP_TAG2:
|
|
|
|
jittag(jc, (arg >> 8) & 0xffff, 2, (int8_t)arg, method);
|
|
|
|
break;
|
|
|
|
case OP_TAGN: {
|
|
|
|
uint64_t tag;
|
|
|
|
memcpy(&tag, jc->pc, 8);
|
|
|
|
jittag(jc, tag, arg >> 8, (int8_t)arg, method);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OP_DISPATCH:
|
|
|
|
| call =>jmptarget(jc, &method->dispatch)
|
|
|
|
break;
|
|
|
|
case OP_HALT:
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
asmlabel(jc, "eof");
|
|
|
|
| nop
|
|
|
|
}
|