Let the JIT emit hasbit-setting code in addition to calling a callback.

This leads to a major (20-40%) improvement in the parsetoproto2
benchmark with small messages.  We now are faster than  proto2 in all
apples-to-apples comparisons, at least given the (admittedly
limited) set of benchmarks in this source tree.
pull/13171/head
Joshua Haberman 14 years ago
parent a5e6a7b029
commit adb6580d97
  1. 9
      benchmarks/parsetoproto2.upb.cc
  2. 2
      upb/def.c
  3. 4
      upb/handlers.c
  4. 12
      upb/handlers.h
  5. 5
      upb/msg.c
  6. 21
      upb/pb/decoder_x86.dasc

@ -42,7 +42,6 @@ PROTO2_APPEND(bool, bool)
upb_flow_t proto2_setstr(void *m, upb_value fval, upb_value val) {
assert(m != NULL);
upb_stdmsg_sethas(m, fval);
upb_fielddef *f = upb_value_getfielddef(fval);
std::string **str = (std::string**)UPB_INDEX(m, f->offset, 1);
if (*str == f->default_ptr) *str = new std::string;
@ -72,7 +71,6 @@ upb_sflow_t proto2_startseq(void *m, upb_value fval) {
upb_sflow_t proto2_startsubmsg(void *m, upb_value fval) {
assert(m != NULL);
upb_fielddef *f = upb_value_getfielddef(fval);
upb_stdmsg_sethas(m, fval);
google::protobuf::Message *prototype = (google::protobuf::Message*)f->prototype;
void **subm = (void**)UPB_INDEX(m, f->offset, 1);
if (*subm == NULL || *subm == f->default_ptr)
@ -183,8 +181,13 @@ static void layout_msgdef_from_proto2(upb_msgdef *upb_md,
uint32_t hasbit = (r->has_bits_offset_ * 8) + proto2_f->index();
// Encapsulation violation END
if (upb_isseq(upb_f)) {
// proto2 does not store hasbits for repeated fields.
upb_f->hasbit = -1;
} else {
upb_f->hasbit = hasbit;
}
upb_f->offset = data_offset;
upb_f->hasbit = hasbit;
upb_fielddef_setaccessor(upb_f, proto2_accessor(upb_f));
if (upb_isstring(upb_f) && !upb_isseq(upb_f)) {

@ -212,7 +212,7 @@ upb_fielddef *upb_fielddef_new() {
f->finalized = false;
f->type = 0;
f->label = UPB_LABEL(OPTIONAL);
f->hasbit = 0;
f->hasbit = -1;
f->offset = 0;
f->number = 0; // not a valid field number.
f->hasdefault = false;

@ -31,7 +31,7 @@ static upb_fhandlers *_upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
if (f) abort();
upb_fhandlers new_f = {false, type, repeated,
repeated && upb_isprimitivetype(type), UPB_ATOMIC_INIT(0),
n, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL,
n, -1, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL,
#ifdef UPB_USE_JIT_X64
0, 0, 0,
#endif
@ -156,7 +156,7 @@ upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, upb_msgdef *m,
static upb_fhandlers toplevel_f = {
false, UPB_TYPE(GROUP), false, false, UPB_ATOMIC_INIT(0), 0,
NULL, NULL, // submsg
-1, NULL, NULL, // submsg
#ifdef NDEBUG
{{0}},
#else

@ -141,6 +141,7 @@ typedef struct _upb_fieldent {
bool is_repeated_primitive;
upb_atomic_t refcount;
uint32_t number;
int32_t valuehasbit;
struct _upb_mhandlers *msg;
struct _upb_mhandlers *submsg; // Set iff upb_issubmsgtype(type) == true.
upb_value fval;
@ -174,6 +175,11 @@ UPB_FHANDLERS_ACCESSORS(startseq, upb_startfield_handler*)
UPB_FHANDLERS_ACCESSORS(endseq, upb_endfield_handler*)
UPB_FHANDLERS_ACCESSORS(msg, struct _upb_mhandlers*)
UPB_FHANDLERS_ACCESSORS(submsg, struct _upb_mhandlers*)
// If set to >= 0, the hasbit will automatically be set after the corresponding
// callback is called (when a JIT is enabled, this can be significantly more
// efficient than setting the hasbit yourself inside the callback). Could add
// this for seq and submsg also, but doesn't look like a win at the moment.
UPB_FHANDLERS_ACCESSORS(valuehasbit, int32_t)
/* upb_mhandlers **************************************************************/
@ -357,11 +363,17 @@ INLINE upb_fhandlers *upb_dispatcher_lookup(upb_dispatcher *d, uint32_t n) {
void _upb_dispatcher_unwind(upb_dispatcher *d, upb_flow_t flow);
INLINE void _upb_dispatcher_sethas(void *_p, int32_t hasbit) {
char *p = (char*)_p;
if (hasbit >= 0) p[hasbit / 8] |= (1 << (hasbit % 8));
}
// Dispatch functions -- call the user handler and handle errors.
INLINE void upb_dispatch_value(upb_dispatcher *d, upb_fhandlers *f,
upb_value val) {
upb_flow_t flow = UPB_CONTINUE;
if (f->value) flow = f->value(d->top->closure, f->fval, val);
_upb_dispatcher_sethas(d->top->closure, f->valuehasbit);
if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
}
void upb_dispatch_startmsg(upb_dispatcher *d);

@ -102,7 +102,7 @@ bool upb_stdmsg_has(void *_m, upb_value fval) {
assert(_m != NULL); \
upb_fielddef *f = upb_value_getfielddef(fval); \
uint8_t *m = _m; \
upb_stdmsg_sethas(_m, fval); \
/* Hasbit is set automatically by the handlers. */ \
*(ctype*)&m[f->offset] = upb_value_get ## type(val); \
return UPB_CONTINUE; \
} \
@ -164,7 +164,7 @@ upb_flow_t upb_stdmsg_setstr(void *_m, upb_value fval, upb_value val) {
assert(_m != NULL);
char *m = _m;
upb_fielddef *f = upb_value_getfielddef(fval);
upb_stdmsg_sethas(_m, fval);
// Hasbit automatically set by the handlers.
_upb_stdmsg_setstr(&m[f->offset], val);
return UPB_CONTINUE;
}
@ -340,6 +340,7 @@ static void upb_accessors_onfreg(void *c, upb_fhandlers *fh, upb_fielddef *f) {
} else {
upb_fhandlers_setvalue(fh, f->accessor->set);
upb_fhandlers_setstartsubmsg(fh, f->accessor->startsubmsg);
upb_fhandlers_setvaluehasbit(fh, f->hasbit);
}
}
}

@ -126,7 +126,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|.type FRAME, upb_dispatcher_frame, r13
|.type STRREF, upb_strref, r14
|.type DECODER, upb_decoder, r15
|.type STDARRAY, upb_stdarray, r15
|.type STDARRAY, upb_stdarray
|
|.macro callp, addr
|| if ((uintptr_t)addr < 0xffffffff) {
@ -273,6 +273,13 @@ void upb_reg_jit_gdb(upb_decoder *d) {
| mov64 ARG2_64, f->fval.val.uint64
|| }
|.endmacro
|
|.macro sethas, reg, hasbit
|| if (hasbit >= 0) {
| or byte [reg + (hasbit / 8)], (1 << (hasbit % 8))
|| }
|.endmacro
#include <stdlib.h>
#include "upb/pb/varint.h"
@ -369,14 +376,6 @@ static void upb_decoder_jit_decodefield(upb_decoder *d, upb_mhandlers *m,
}
}
// DEPENDS: closure is in ARG1_64
static void upb_decoder_jit_sethas(upb_decoder *d, upb_fielddef *f) {
if (f->hasbit < 0) return;
size_t byte = f->hasbit / 8;
uint8_t bit = 1 << (f->hasbit % 8);
| or byte [ARG1_64 + byte], bit
}
#if 0
// These appear not to speed things up, but keeping around for
// further experimentation.
@ -465,15 +464,12 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
f->value == &upb_stdmsg_setuint64 ||
f->value == &upb_stdmsg_setptr ||
f->value == &upb_stdmsg_setdouble) {
upb_decoder_jit_sethas(d, fd);
| mov [ARG1_64 + fd->offset], ARG3_64
} else if (f->value == &upb_stdmsg_setint32 ||
f->value == &upb_stdmsg_setuint32 ||
f->value == &upb_stdmsg_setfloat) {
upb_decoder_jit_sethas(d, fd);
| mov [ARG1_64 + fd->offset], ARG3_32
} else if (f->value == &upb_stdmsg_setbool) {
upb_decoder_jit_sethas(d, fd);
| mov [ARG1_64 + fd->offset], ARG3_8
#if 0
// These appear not to speed things up, but keeping around for
@ -495,6 +491,7 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
| loadfval f
| callp f->value
}
| sethas CLOSURE, f->valuehasbit
}
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
}

Loading…
Cancel
Save