Refactor varint encoding/decoding.

pull/13171/head
Joshua Haberman 14 years ago
parent 7cf5893dcc
commit 4a99abba12
  1. 2
      src/upb_decoder.c
  2. 25
      src/upb_decoder_x86.dasc
  3. 50
      src/upb_varint.h
  4. 2
      tests/test_varint.c

@ -9,7 +9,7 @@
#include <stddef.h> #include <stddef.h>
#include <stdlib.h> #include <stdlib.h>
#include "upb_decoder.h" #include "upb_decoder.h"
#include "upb_varint_decoder.h" #include "upb_varint.h"
#ifdef UPB_USE_JIT_X64 #ifdef UPB_USE_JIT_X64
#define Dst_DECL upb_decoder *d #define Dst_DECL upb_decoder *d

@ -249,28 +249,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|.endmacro |.endmacro
#include <stdlib.h> #include <stdlib.h>
#include "upb_varint_decoder.h" #include "upb_varint.h"
static size_t upb_value_size(uint64_t val) {
#ifdef __GNUC__
int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0.
#else
int high_bit = 0;
uint64_t tmp = val;
while(tmp >>= 1) high_bit++;
#endif
return val == 0 ? 1 : high_bit / 8 + 1;
}
static uint64_t upb_encode_varint(uint64_t val)
{
uint64_t ret = 0;
for (int bitpos = 0; val; bitpos+=8, val >>=7) {
if (bitpos > 0) ret |= (1 << (bitpos-1));
ret |= (val & 0x7f) << bitpos;
}
return ret;
}
// PTR should point to the beginning of the tag. // PTR should point to the beginning of the tag.
static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag, static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag,
@ -493,7 +472,7 @@ static void upb_decoder_jit_msg(upb_decoder *d, upb_handlers_msgent *m) {
for(int i = 0; i < num_keys; i++) { for(int i = 0; i < num_keys; i++) {
uint32_t key = keys[i]; uint32_t key = keys[i];
upb_handlers_fieldent *f = upb_inttable_lookup(&m->fieldtab, key); upb_handlers_fieldent *f = upb_inttable_lookup(&m->fieldtab, key);
uint32_t tag = upb_encode_varint(key); uint32_t tag = upb_vencode(key);
if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f); if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f);
last_tag = tag; last_tag = tag;
last_f = f; last_f = f;

@ -4,11 +4,8 @@
* Copyright (c) 2011 Google Inc. See LICENSE for details. * Copyright (c) 2011 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com> * Author: Josh Haberman <jhaberman@gmail.com>
* *
* A number of routines for varint decoding (we keep them all around to have * A number of routines for varint manipulation (we keep them all around to
* multiple approaches available for benchmarking). All of these functions * have multiple approaches available for benchmarking).
* require the buffer to have at least 10 bytes available; if we don't know
* for sure that there are 10 bytes, then there is only one viable option
* (branching on every byte).
*/ */
#ifndef UPB_VARINT_DECODER_H_ #ifndef UPB_VARINT_DECODER_H_
@ -22,6 +19,8 @@
extern "C" { extern "C" {
#endif #endif
/* Decoding *******************************************************************/
// All decoding functions return this struct by value. // All decoding functions return this struct by value.
typedef struct { typedef struct {
const char *p; // NULL if the varint was unterminated. const char *p; // NULL if the varint was unterminated.
@ -76,12 +75,21 @@ done:
return r; return r;
} }
// Given an encoded varint v, returns an integer with a single bit set that
// indicates the end of the varint. Subtracting one from this value will
// yield a mask that leaves only bits that are part of the varint. Returns
// 0 if the varint is unterminated.
INLINE uint64_t upb_get_vstopbit(uint64_t v) {
uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
return ~cbits & (cbits+1);
}
INLINE uint64_t upb_get_vmask(uint64_t v) { return upb_get_vstopbit(v) - 1; }
// Decodes a varint of at most 8 bytes without branching (except for error). // Decodes a varint of at most 8 bytes without branching (except for error).
INLINE upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) { INLINE upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
uint64_t b; uint64_t b;
memcpy(&b, r.p, sizeof(b)); memcpy(&b, r.p, sizeof(b));
uint64_t cbits = b | 0x7f7f7f7f7f7f7f7fULL; uint64_t stop_bit = upb_get_vstopbit(b);
uint64_t stop_bit = ~cbits & (cbits+1);
b &= (stop_bit - 1); b &= (stop_bit - 1);
b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f); b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f);
b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff); b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff);
@ -100,8 +108,7 @@ INLINE upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
INLINE upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) { INLINE upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
uint64_t b; uint64_t b;
memcpy(&b, r.p, sizeof(b)); memcpy(&b, r.p, sizeof(b));
uint64_t cbits = b | 0x7f7f7f7f7f7f7f7fULL; uint64_t stop_bit = upb_get_vstopbit(b);
uint64_t stop_bit = ~cbits & (cbits + 1);
b = (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1); b = (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
b += b & 0x007f007f007f007fULL; b += b & 0x007f007f007f007fULL;
b += 3 * (b & 0x0000ffff0000ffffULL); b += 3 * (b & 0x0000ffff0000ffffULL);
@ -148,6 +155,31 @@ INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {
return upb_vdecode_max8_massimino(r); return upb_vdecode_max8_massimino(r);
} }
/* Encoding *******************************************************************/
INLINE size_t upb_value_size(uint64_t val) {
#ifdef __GNUC__
int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0.
#else
int high_bit = 0;
uint64_t tmp = val;
while(tmp >>= 1) high_bit++;
#endif
return val == 0 ? 1 : high_bit / 8 + 1;
}
// Currently only works with 32-bit varints.
INLINE uint64_t upb_vencode(uint32_t val) {
uint64_t ret = 0;
for (int bitpos = 0; val; bitpos+=8, val >>=7) {
if (bitpos > 0) ret |= (1 << (bitpos-1));
ret |= (val & 0x7f) << bitpos;
}
return ret;
}
#ifdef __cplusplus #ifdef __cplusplus
} /* extern "C" */ } /* extern "C" */
#endif #endif

@ -4,7 +4,7 @@
* Copyright (c) 2011 Google Inc. See LICENSE for details. * Copyright (c) 2011 Google Inc. See LICENSE for details.
*/ */
#include "upb_varint_decoder.h" #include "upb_varint.h"
#include "upb_test.h" #include "upb_test.h"
static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) { static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {

Loading…
Cancel
Save