pull/13171/head
Joshua Haberman 4 years ago
parent 02ff6fb996
commit 763a3f6293
  1. 1
      BUILD
  2. 9
      upb/decode.c
  3. 23
      upb/decode.h
  4. 210
      upb/decode_fast.c

@ -79,6 +79,7 @@ cc_library(
name = "upb",
srcs = [
"upb/decode.c",
"upb/decode_fast.c",
"upb/encode.c",
"upb/msg.c",
"upb/msg.h",

@ -134,15 +134,6 @@ static const int8_t delim_ops[37] = {
OP_VARPCK_LG2(3), /* REPEATED SINT64 */
};
/* Data pertaining to the parse. */
typedef struct {
const char *limit; /* End of delimited region or end of buffer. */
upb_arena *arena;
int depth;
uint32_t end_group; /* Set to field number of END_GROUP tag, if any. */
jmp_buf err;
} upb_decstate;
typedef union {
bool bool_val;
uint32_t uint32_val;

@ -5,6 +5,8 @@
#ifndef UPB_DECODE_H_
#define UPB_DECODE_H_
#include <setjmp.h>
#include "upb/msg.h"
#ifdef __cplusplus
@ -14,6 +16,27 @@ extern "C" {
bool upb_decode(const char *buf, size_t size, upb_msg *msg,
const upb_msglayout *l, upb_arena *arena);
/* Internal only: data pertaining to the parse. */
typedef struct {
const char *limit; /* End of delimited region or end of buffer. */
const char *fastlimit; /* End of delimited region or end of buffer. */
upb_arena *arena;
int depth;
uint32_t end_group; /* Set to field number of END_GROUP tag, if any. */
jmp_buf err;
} upb_decstate;
struct upb_fasttable;
typedef const char *_upb_field_parser(upb_decstate *d, const char *ptr,
upb_msg *msg, struct upb_fasttable *table,
uint64_t hasbits, uint64_t data);
typedef struct upb_fasttable {
_upb_field_parser *field_parser[16];
uint64_t field_data[16];
} upb_fasttable;
#ifdef __cplusplus
} /* extern "C" */
#endif

@ -0,0 +1,210 @@
#include "upb/decode.h"
#include "upb/port_def.inc"
#define UPB_PARSE_PARAMS \
upb_decstate *d, const char *ptr, upb_msg *msg, upb_fasttable *table, \
uint64_t hasbits, uint64_t data
UPB_NOINLINE
const char *fastdecode_dispatch(upb_decstate *d, const char *ptr, upb_msg *msg,
upb_fasttable *table, uint64_t hasbits) {
uint16_t tag;
uint64_t data;
if (UPB_UNLIKELY(ptr >= d->fastlimit)) return ptr;
memcpy(&tag, ptr, 2);
data = table->field_data[(tag & 0xf7) >> 3] ^ tag;
return table->field_parser[(tag & 0xf7) >> 3](d, ptr, msg, table, hasbits,
data);
}
UPB_FORCEINLINE bool fastdecode_checktag(uint64_t data, int tagbytes) {
const char zeros[2] = {0, 0};
return memcmp(&data, &zeros, tagbytes) == 0;
}
UPB_FORCEINLINE
static const char *fastdecode_scalarfixed(UPB_PARSE_PARAMS, int tagbytes,
int valbytes) {
char *field;
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) return ptr;
hasbits |= data;
field = (char*)msg + (data >> 48);
memcpy(field, ptr + tagbytes, valbytes);
return fastdecode_dispatch(d, ptr + tagbytes + valbytes, msg, table, hasbits);
}
const char *upb_psf64_1bt(UPB_PARSE_PARAMS) {
return fastdecode_scalarfixed(d, ptr, msg, table, hasbits, data, 1, 8);
}
const char *upb_psf64_2bt(UPB_PARSE_PARAMS) {
return fastdecode_scalarfixed(d, ptr, msg, table, hasbits, data, 2, 8);
}
const char *upb_psf32_1bt(UPB_PARSE_PARAMS) {
return fastdecode_scalarfixed(d, ptr, msg, table, hasbits, data, 1, 4);
}
const char *upb_psf32_2bt(UPB_PARSE_PARAMS) {
return fastdecode_scalarfixed(d, ptr, msg, table, hasbits, data, 2, 4);
}
UPB_FORCEINLINE
static const char *fastdecode_longvarint_impl(UPB_PARSE_PARAMS, int64_t res1,
int valbytes) {
char *field = (char *)data;
// The algorithm relies on sign extension to set all high bits when the varint
// continues. This way it can use "and" to aggregate in to the result.
const int8_t *p = (const int8_t*)(ptr);
// However this requires the low bits after shifting to be 1's as well. On
// x86_64 a shld from a single register filled with enough 1's in the high
// bits can accomplish all this in one instruction. It so happens that res1
// has 57 high bits of ones, which is enough for the largest shift done.
assert(res1 >> 7 == -1);
uint64_t ones = res1; // save the useful high bit 1's in res1
uint64_t byte;
int64_t res2, res3;
int sign_bit;
#define SHLD(n) byte = ((byte << (n * 7)) | (ones >> (64 - (n * 7))))
// Micro benchmarks show a substantial improvement to capture the sign
// of the result in the case of just assigning the result of the shift
// (ie first 2 steps).
#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__)
#define SHLD_SIGN(n) \
__asm__("shldq %3, %2, %1" \
: "=@ccs"(sign_bit), "+r"(byte) \
: "r"(ones), "i"(n * 7))
#else
#define SHLD_SIGN(n) \
do { \
SHLD(n); \
sign_bit = (int64_t)(byte) < 0; \
} while (0)
#endif
byte = p[1];
SHLD_SIGN(1);
res2 = byte;
if (!sign_bit) goto done2;
byte = p[2];
SHLD_SIGN(2);
res3 = byte;
if (!sign_bit) goto done3;
byte = p[3];
SHLD(3);
res1 &= byte;
if (res1 >= 0) goto done4;
byte = p[4];
SHLD(4);
res2 &= byte;
if (res2 >= 0) goto done5;
byte = p[5];
SHLD(5);
res3 &= byte;
if (res3 >= 0) goto done6;
byte = p[6];
SHLD(6);
res1 &= byte;
if (res1 >= 0) goto done7;
byte = p[7];
SHLD(7);
res2 &= byte;
if (res2 >= 0) goto done8;
byte = p[8];
SHLD(8);
res3 &= byte;
if (res3 >= 0) goto done9;
byte = p[9];
// Last byte only contains 0 or 1 for valid 64bit varints. If it's 0 it's
// a denormalized varint that shouldn't happen. The continuation bit of byte
// 9 has already the right value hence just expect byte to be 1.
if (UPB_LIKELY(byte == 1)) goto done10;
if (byte == 0) {
res3 ^= (uint64_t)(1) << 63;
goto done10;
}
return NULL; // Value is too long to be a varint64
#define DONE(n) \
done##n : { \
uint64_t val = res1 & res2 & res3; \
memcpy(field, &val, valbytes); \
return fastdecode_dispatch(d, (const char *)p + n, msg, table, hasbits); \
};
done2 : {
uint64_t val = res1 & res2;
memcpy(field, &val, valbytes);
return fastdecode_dispatch(d, (const char*)p + 2, msg, table, hasbits);
}
DONE(3)
DONE(4)
DONE(5)
DONE(6)
DONE(7)
DONE(8)
DONE(9)
DONE(10)
#undef DONE
}
UPB_NOINLINE
static const char *fastdecode_longvarint32(UPB_PARSE_PARAMS, int64_t val) {
return fastdecode_longvarint_impl(d, ptr, msg, table, hasbits, data, val, 4);
}
UPB_NOINLINE
static const char *fastdecode_longvarint64(UPB_PARSE_PARAMS, int64_t val) {
return fastdecode_longvarint_impl(d, ptr, msg, table, hasbits, data, val, 8);
}
UPB_FORCEINLINE
static const char *fastdecode_longvarint(UPB_PARSE_PARAMS, int64_t val,
int valbytes) {
if (valbytes == 4) {
return fastdecode_longvarint32(d, ptr, msg, table, hasbits, data, val);
} else if (valbytes == 8) {
return fastdecode_longvarint64(d, ptr, msg, table, hasbits, data, val);
}
UPB_UNREACHABLE();
}
UPB_FORCEINLINE
static const char *fastdecode_scalarvarint(UPB_PARSE_PARAMS, int tagbytes,
int valbytes) {
int64_t val;
void *field;
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) return ptr;
ptr += tagbytes;
hasbits |= data;
field = (char*)msg + (data >> 48);
val = *ptr;
if (UPB_UNLIKELY(val < 0)) {
return fastdecode_longvarint(d, ptr, msg, table, hasbits, (uint64_t)field,
val, valbytes);
}
memcpy(field, &val, valbytes);
return fastdecode_dispatch(d, ptr + 1, msg, table, hasbits);
}
const char *upb_psv32_1bt(UPB_PARSE_PARAMS) {
return fastdecode_scalarvarint(d, ptr, msg, table, hasbits, data, 1, 4);
}
const char *upb_psv32_2bt(UPB_PARSE_PARAMS) {
return fastdecode_scalarvarint(d, ptr, msg, table, hasbits, data, 2, 4);
}
const char *upb_psv64_1bt(UPB_PARSE_PARAMS) {
return fastdecode_scalarvarint(d, ptr, msg, table, hasbits, data, 1, 8);
}
const char *upb_psv64_2bt(UPB_PARSE_PARAMS) {
return fastdecode_scalarvarint(d, ptr, msg, table, hasbits, data, 2, 8);
}
Loading…
Cancel
Save