First version of an assembly language decoder.

It is slower than the C decoder for now because it
falls off the fast path too often.  But it can
successfully decode varints, fixed32 and fixed64.
pull/13171/head
Joshua Haberman 14 years ago
parent f1e1cc4695
commit d8b2154862
  1. 18
      Makefile
  2. 10
      src/upb.h
  3. 33
      src/upb_decoder.c
  4. 2
      src/upb_decoder.h
  5. 219
      src/upb_decoder_x64.asm
  6. 2
      src/upb_def.c
  7. 2
      src/upb_def.h
  8. 4
      src/upb_msg.h
  9. 2
      src/upb_table.c
  10. 13
      src/upb_table.h

@ -77,6 +77,9 @@ STREAM= \
src/upb_msg.c \
src/upb_glue.c \
ASMCORE= \
src/upb_decoder_x64.asm
# Parts of core that are yet to be converted.
OTHERSRC=src/upb_encoder.c src/upb_text.c
@ -114,8 +117,8 @@ LIBUPB=src/libupb.a
LIBUPB_PIC=src/libupb_pic.a
lib: $(LIBUPB)
OBJ=$(patsubst %.c,%.o,$(SRC))
PICOBJ=$(patsubst %.c,%.lo,$(SRC))
OBJ=$(patsubst %.c,%.o,$(SRC)) src/upb_decoder_x64.o
PICOBJ=$(patsubst %.c,%.lo,$(SRC)) src/upb_decoder_x64.lo
$(LIBUPB): $(OBJ)
$(E) AR $(LIBUPB)
$(Q) ar rcs $(LIBUPB) $(OBJ)
@ -135,12 +138,19 @@ $(LIBUPB_PIC): $(PICOBJ)
# critical path but gets very large when -O3 is used.
src/upb_def.o: src/upb_def.c
$(E) CC $<
$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $<
$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $<
src/upb_def.lo: src/upb_def.c
$(E) 'CC -fPIC' $<
$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< -fPIC
$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC
src/upb_decoder_x64.o: src/upb_decoder_x64.asm
$(E) NASM $<
$(Q) nasm -Ox src/upb_decoder_x64.asm -o src/upb_decoder_x64.o -f macho64
src/upb_decoder_x64.lo: src/upb_decoder_x64.asm
$(E) NASM $<
$(Q) nasm -Ox src/upb_decoder_x64.asm -o src/upb_decoder_x64.o -f macho64
# Function to expand a wildcard pattern recursively.
rwildcard=$(strip $(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2)$(filter $(subst *,%,$2),$d)))

@ -29,6 +29,16 @@ extern "C" {
#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
INLINE void nop_printf(const char *fmt, ...) {
(void)fmt;
}
#ifdef NDEBUG
#define DEBUGPRINTF nop_printf
#else
#define DEBUGPRINTF printf
#endif
// Rounds val up to the next multiple of align.
INLINE size_t upb_align_up(size_t val, size_t align) {
return val % align == 0 ? val : val + align - (val % align);

@ -14,7 +14,7 @@
// If the return value is other than UPB_CONTINUE, that is what the last
// callback returned.
extern upb_flow_t upb_fastdecode(const char **p, const char *end,
upb_value_handler_t *value_cb, void *closure,
upb_value_handler_t value_cb, void *closure,
void *table, int table_size);
/* Pure Decoding **************************************************************/
@ -131,6 +131,7 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
INLINE void upb_decoder_advance(upb_decoder *d, size_t len) {
d->ptr += len;
//d->bytes_parsed_slow += len;
}
INLINE size_t upb_decoder_bufleft(upb_decoder *d) {
@ -214,6 +215,7 @@ INLINE bool upb_decode_tag(upb_decoder *d, upb_tag *tag) {
if ((*(p++) & 0x80) == 0) goto done; // likely
slow:
// Decode a full varint starting over from ptr.
DEBUGPRINTF("tag was >2 bytes.\n");
if (!upb_decode_varint_slow(d, &val)) return false;
tag_int = upb_value_getint64(val);
p = d->ptr; // Trick the next line into not overwriting us.
@ -317,6 +319,8 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
d->end = NULL; // Force a buffer pull.
d->submsg_end = (void*)0x1; // But don't let end-of-message get triggered.
d->msgdef = d->top->msgdef;
//d->bytes_parsed_fast = 0;
//d->bytes_parsed_slow = 0;
// TODO: handle UPB_SKIPSUBMSG
#define CHECK_FLOW(expr) if ((expr) == UPB_BREAK) { /*assert(!upb_ok(status));*/ goto err; }
@ -335,11 +339,25 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
CHECK_FLOW(upb_pop(d));
}
//const char *ptr = d->ptr;
// Decodes as many fields as possible, updating d->ptr appropriately,
// before falling through to the slow(er) path.
//CHECK_FLOW(upb_fastdecode(&d->ptr, d->end,
// d->dispatcher->top->handlers.set->value,
// d->top->handlers.closure));
const char *end = UPB_MIN(d->end, d->submsg_end);
#ifdef USE_ASSEMBLY_FASTPATH
CHECK_FLOW(upb_fastdecode(&d->ptr, end,
d->dispatcher.top->handlers.set->value,
d->dispatcher.top->handlers.closure,
d->top->msgdef->itof.array,
d->top->msgdef->itof.array_size));
if (end - d->ptr < 12) {
DEBUGPRINTF("Off the fast path because <12 bytes of data\n");
} else {
DEBUGPRINTF("Off the fast path for some other reason.\n");
}
#endif
//if (d->ptr > ptr) {
// d->bytes_parsed_fast += (d->ptr - ptr);
//}
// Parse/handle tag.
upb_tag tag;
@ -348,6 +366,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
// Normal end-of-file.
upb_clearerr(status);
CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher));
//DEBUGPRINTF("bytes parsed fast: %d, bytes parsed slow; %d\n", d->bytes_parsed_fast, d->bytes_parsed_slow);
return;
} else {
if (status->code == UPB_EOF) {
@ -363,8 +382,10 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
upb_value val;
switch (tag.wire_type) {
case UPB_WIRE_TYPE_START_GROUP:
DEBUGPRINTF("parsing start group\n");
break; // Nothing to do now, below we will push appropriately.
case UPB_WIRE_TYPE_END_GROUP:
DEBUGPRINTF("parsing end group\n");
if(d->top->end_offset != UPB_GROUP_END_OFFSET) {
upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag.");
goto err;
@ -395,6 +416,8 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
}
upb_fielddef *f = e->f;
assert(e->field_type == f->type);
assert(e->native_wire_type == upb_types[f->type].native_wire_type);
if (tag.wire_type != e->native_wire_type) {
// TODO: Support packed fields.
@ -416,11 +439,13 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
// doesn't check this, and it would slow us down, so pass for now.
switch (e->field_type) {
case UPB_TYPE(MESSAGE):
DEBUGPRINTF("parsing submessage\n");
case UPB_TYPE(GROUP):
CHECK_FLOW(upb_push(d, f, val, e->field_type));
continue; // We have no value to dispatch.
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES):
DEBUGPRINTF("parsing string\n");
CHECK(upb_decode_string(d, &val, &d->tmp));
break;
case UPB_TYPE(SINT32):

@ -70,6 +70,8 @@ struct _upb_decoder {
// Msgdef for the current level.
upb_msgdef *msgdef;
size_t bytes_parsed_fast, bytes_parsed_slow;
};
// A upb_decoder decodes the binary protocol buffer format, writing the data it

@ -0,0 +1,219 @@
DEFAULT REL ; Default to RIP-relative addressing instead of absolute.
extern _upb_decode_varint_fast64
SECTION .data
; Our dispatch table; used to jump to the right handler, keyed on the field's
; type.
dispatch_table:
dq _upb_fastdecode.cant_fast_path ; field not in table (type == 0). (check_4).
dq _upb_fastdecode.fixed64 ; double
dq _upb_fastdecode.fixed32 ; float
dq _upb_fastdecode.varint ; int64
dq _upb_fastdecode.varint ; uint64
dq _upb_fastdecode.varint ; int32
dq _upb_fastdecode.fixed64 ; fixed64
dq _upb_fastdecode.fixed32 ; fixed32
dq _upb_fastdecode.varint ; bool
dq _upb_fastdecode.cant_fast_path ; string (TODO)
dq _upb_fastdecode.cant_fast_path ; group (check_6)
dq _upb_fastdecode.cant_fast_path ; message
dq _upb_fastdecode.cant_fast_path ; bytes (TODO)
dq _upb_fastdecode.varint ; uint32
dq _upb_fastdecode.varint ; enum
dq _upb_fastdecode.fixed32 ; sfixed32
dq _upb_fastdecode.fixed64 ; sfixed64
dq _upb_fastdecode.varint_sint32 ; sint32
dq _upb_fastdecode.varint_sint64 ; sint64
GLOBAL _upb_decode_fast
SECTION .text
; Register allocation.
%define BUF rbx ; const char *p, current buf position.
%define END rbp ; const char *end, where the buf ends (either submsg end or buf end)
%define BUF_ADDR r12 ; upb_decoder *d.
%define FIELDDEF r13 ; upb_fielddef *f, needs to be preserved across varint decoding call.
%define CALLBACK r14
%define CLOSURE r15
; Stack layout: *tableptr, uint32_t maxfield_times_8
%define STACK_SPACE 24 ; this value + 8 must be a multiple of 16.
%define TABLE_SPILL [rsp] ; our lookup table, indexed by field number.
%define MAXFIELD_TIMES_8_SPILL [rsp+8]
; Executing the fast path requires the following conditions:
; - check_1: there are >=12 bytes left (<=2 byte tag and <=10 byte varint).
; - check_2: the tag is <= 2 bytes.
; - check_3: the field number is <= the table size
; (ie. it must be an array lookup, not a hash lookup).
; - check_4: the field is known (found in the table).
; - check_5: the wire type we read is correct for the field number,
; ("packed" fields are not accepted, yet. this could be handled
; efficiently by doing an extra check on the "type check failed"
; path that goes into a tight loop if the encoding was packed).
; - check_6: the field is not a group or a message (or string, TODO)
; (this could be relaxed, but due to delegation it's a bit tricky).
; - if the value is a string, the entire string is available in
; the buffer, and our cached string object can be recycled.
%macro decode_and_dispatch_ 0
align 16
.decode_and_dispatch:
; Load a few values we'll need in a sec.
mov r8, TABLE_SPILL
mov r9d, MAXFIELD_TIMES_8_SPILL
mov rax, END
sub rax, BUF
cmp rax, 12
jb _upb_fastdecode.cant_fast_path ; check_1 (<12 bytes left).
; Decode a 1 or 2-byte varint -> eax.
mov cl, byte [BUF]
lea rdi, [BUF+1]
movzx rax, cl ; Need all of rax since we're doing a 64-bit lea later.
and eax, 0x7f
test cl, cl
jns .one_byte_tag ; Should be predictable if fields are in order.
movzx ecx, byte [BUF+1]
lea rdi, [BUF+2]
mov edx, ecx
and edx, 0x7f
shl edx, 7
or eax, edx
test al, al
js _upb_fastdecode.cant_fast_path ; check_2 (tag was >2 bytes).
.one_byte_tag:
mov BUF, rdi
; Decode tag and dispatch.
mov ecx, eax
and eax, 0x3ff8 ; eax now contains field number * 8
lea r11, [r8+rax*2] ; *2 is really *16, since rax is already *8.
and ecx, 0x7 ; ecx now contains wire type.
cmp eax, r9d
jae _upb_fastdecode.cant_fast_path ; check_3 (field number > table size)
mov FIELDDEF, [r11+8] ; Lookup fielddef (upb_itof_ent.f)
movzx rdx, BYTE [r11+1] ; Lookup field type.
mov rax, qword dispatch_table
jmp [rax+rdx*8]
%endmacro
%macro decode_and_dispatch 0
jmp .decode_and_dispatch
%endmacro
%macro call_callback 0
; Value arg must already be in rdx when macro is called.
mov rdi, CLOSURE
mov rsi, FIELDDEF
mov rcx, 33 ; RAW; we could pass the correct type, or only do this in non-debug modes.
call CALLBACK
mov [BUF_ADDR], BUF
cmp eax, 0
jne .done ; Caller requested BREAK or SKIPSUBMSG.
%endmacro
%macro check_type 1
cmp ecx, %1
jne _upb_fastdecode.cant_fast_path ; check_5 (wire type check failed).
%endmacro
; extern upb_flow_t upb_fastdecode(const char **p, const char *end,
; upb_value_handler_t value_cb, void *closure,
; void *table, int table_size);
align 16
global _upb_fastdecode
_upb_fastdecode:
; We use all callee-save regs.
push rbx
push rbp
push r12
push r13
push r14
push r15
sub rsp, STACK_SPACE
; Parse arguments into reg vals and stack.
mov BUF_ADDR, rdi
mov BUF, [rdi]
mov END, rsi
mov CALLBACK, rdx
mov CLOSURE, rcx
mov TABLE_SPILL, r8
shl r9, 3
mov MAXFIELD_TIMES_8_SPILL, r9
decode_and_dispatch
align 16
.varint:
call _upb_decode_varint_fast64 ; BUF is already in rdi.
test rax, rax
jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error.
mov BUF, rax
call_callback ; rdx already holds value.
decode_and_dispatch_
align 16
.fixed32:
mov edx, DWORD [BUF] ; Might be unaligned, but that's ok.
add BUF, 4
call_callback
decode_and_dispatch
align 16
.fixed64:
mov rdx, QWORD [BUF] ; Might be unaligned, but that's ok.
add BUF, 8
call_callback
decode_and_dispatch
align 16
.varint_sint32:
call _upb_decode_varint_fast64 ; BUF is already in rdi.
test rax, rax
jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error.
mov BUF, rax
; Perform 32-bit zig-zag decoding.
mov ecx, edx
shr edx, 1
and ecx, 0x1
neg ecx
xor edx, ecx
call_callback
decode_and_dispatch
align 16
.varint_sint64:
call _upb_decode_varint_fast64 ; BUF is already in rdi.
test rax, rax
jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error.
mov BUF, rax
; Perform 64-bit zig-zag decoding.
mov rcx, rdx
shr rdx, 1
and ecx, 0x1
neg rcx
xor rdx, rcx
call_callback
decode_and_dispatch
.cant_fast_path:
mov rax, 0 ; UPB_CONTINUE -- continue as before.
.done:
; If coming via done, preserve the user callback's return in rax.
add rsp, STACK_SPACE
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
ret

@ -615,7 +615,7 @@ static upb_flow_t upb_fielddef_endmsg(void *_b) {
// Field was successfully read, add it as a field of the msgdef.
upb_msgdef *m = upb_defbuilder_top(b);
upb_itof_ent itof_ent = {0, upb_types[f->type].native_wire_type, f->type, f};
upb_itof_ent itof_ent = {0, f->type, upb_types[f->type].native_wire_type, f};
upb_ntof_ent ntof_ent = {{f->name, 0}, f};
upb_inttable_insert(&m->itof, f->number, &itof_ent);
upb_strtable_insert(&m->ntof, &ntof_ent.e);

@ -163,8 +163,8 @@ typedef struct _upb_msgdef {
// Hash table entries for looking up fields by name or number.
typedef struct {
bool junk;
upb_wire_type_t native_wire_type;
upb_fieldtype_t field_type;
upb_wire_type_t native_wire_type;
upb_fielddef *f;
} upb_itof_ent;
typedef struct {

@ -97,11 +97,13 @@ INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) {
INLINE void upb_value_write(upb_valueptr ptr, upb_value val,
upb_fieldtype_t ft) {
#ifndef NDEBUG
if (ft == UPB_VALUETYPE_ARRAY) {
assert(val.type == UPB_VALUETYPE_ARRAY);
} else {
} else if (val.type != UPB_VALUETYPE_RAW) {
assert(val.type == upb_types[ft].inmemory_type);
}
#endif
#define CASE(t, member_name) \
case UPB_TYPE(t): *ptr.member_name = val.val.member_name; break;

@ -188,7 +188,7 @@ void upb_inttable_compact(upb_inttable *t) {
while ((1UL << lg2_array) < largest_key) ++lg2_array;
++lg2_array; // Undo the first iteration.
size_t array_size;
int array_count;
int array_count = 0;
while (lg2_array > 0) {
array_size = (1 << --lg2_array);
//printf("Considering size %d (btw, our table has %d things total)\n", array_size, upb_inttable_count(t));

@ -123,18 +123,21 @@ INLINE void *_upb_inttable_fastlookup(upb_inttable *t, uint32_t key,
upb_inttable_value *arrval =
(upb_inttable_value*)UPB_INDEX(t->array, key, value_size);
if (_upb_inttable_isarrkey(t, key)) {
//printf("array lookup for key %d, &val=%p, has_entry=%d\n", key, val, val->has_entry);
//DEBUGPRINTF("array lookup for key %d, &val=%p, has_entry=%d\n", key, val, val->has_entry);
return (arrval->has_entry) ? arrval : NULL;
}
uint32_t bucket = _upb_inttable_bucket(t, key);
upb_inttable_entry *e =
(upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size);
//printf("looking in first bucket %d, entry size=%zd, addr=%p\n", bucket, entry_size, e);
//DEBUGPRINTF("looking in first bucket %d, entry size=%zd, addr=%p\n", bucket, entry_size, e);
while (1) {
//printf("%d, %d, %d\n", e->val.has_entry, e->hdr.key, key);
if (e->hdr.key == key) return &e->val;
//DEBUGPRINTF("%d, %d, %d\n", e->val.has_entry, e->hdr.key, key);
if (e->hdr.key == key) {
DEBUGPRINTF("returning val from hash part\n");
return &e->val;
}
if ((bucket = e->hdr.next) == UPB_END_OF_CHAIN) return NULL;
//printf("looking in bucket %d\n", bucket);
//DEBUGPRINTF("looking in bucket %d\n", bucket);
e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size);
}
}

Loading…
Cancel
Save