Switch to non-branching varint decoder.

pull/13171/head
Joshua Haberman 14 years ago
parent 2c86e7eddb
commit 19517cc6f3
  1. 92
      src/upb_varint_decoder.h
  2. 10
      tests/test_varint.c
  3. 2
      tests/upb_test.h

@ -76,43 +76,69 @@ done:
return r;
}
#ifdef __SSE__
#include <xmmintrin.h>
// Avoids branches (this can very likely be improved). Requires SSE.
INLINE upb_decoderet upb_decode_varint_nobranch(const char *p) {
upb_decoderet r = {(void*)0, 0};
__m128i val128 = _mm_loadu_si128((void*)p);
unsigned int continuation_bits = _mm_movemask_epi8(val128);
unsigned int bsr_val = ~continuation_bits;
int varint_length = __builtin_ffs(bsr_val);
if (varint_length > 10) return r;
uint16_t twob;
memcpy(&twob, p, 2);
twob &= 0x7f7f;
twob = ((twob & 0xff00) >> 1) | (twob & 0xff);
uint64_t eightb;
memcpy(&eightb, p + 2, 8);
eightb &= 0x7f7f7f7f7f7f7f7f;
eightb = ((eightb & 0xff00ff00ff00ff00) >> 1) | (eightb & 0x00ff00ff00ff00ff);
eightb = ((eightb & 0xffff0000ffff0000) >> 2) | (eightb & 0x0000ffff0000ffff);
eightb = ((eightb & 0xffffffff00000000) >> 4) | (eightb & 0x00000000ffffffff);
uint64_t all_bits = twob | (eightb << 14);
int varint_bits = varint_length * 7;
uint64_t mask = varint_bits == 70 ? (uint64_t)-1 : (1ULL << (varint_bits)) - 1;
r.val = all_bits & mask;
r.p = p + varint_length;
// Avoids branches for values >2-bytes.
INLINE upb_decoderet upb_decode_varint_nobranch1(const char *p) {
uint64_t b = 0;
upb_decoderet r = {p, 0};
memcpy(&b, r.p, 2);
if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; }
r.val = (b & 0x7f) | ((b & 0x7f00) >> 1);
r.p = p + 2;
if ((b & 0x8000) == 0) return r;
// >2-byte varint.
memcpy(&b, r.p, sizeof(b));
uint64_t cbits = b | 0x7f7f7f7f7f7f7f7fULL;
uint64_t stop_bit = ~cbits & (cbits+1);
b &= (stop_bit - 1);
b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f);
b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff);
b = ((b & 0xffffffff00000000) >> 4) | (b & 0x00000000ffffffff);
r.val |= b << 14;
r.p += (__builtin_ctzll(stop_bit) + 1) / 8;
if (stop_bit == 0) {
// Error: unterminated varint.
upb_decoderet err_r = {(void*)0, 0};
return err_r;
}
return r;
}
#endif
// Avoids branches for values >2-bytes.
INLINE upb_decoderet upb_decode_varint_nobranch2(const char *p) {
uint64_t b = 0;
upb_decoderet r = {p, 0};
memcpy(&b, r.p, 2);
if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; }
r.val = (b & 0x7f) | ((b & 0x7f00) >> 1);
r.p = p + 2;
if ((b & 0x8000) == 0) return r;
// >2-byte varint.
memcpy(&b, r.p, sizeof(b));
uint64_t cbits = b | 0x7f7f7f7f7f7f7f7fULL;
uint64_t stop_bit = ~cbits & (cbits + 1);
b = (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
b += b & 0x007f007f007f007fULL;
b += 3 * (b & 0x0000ffff0000ffffULL);
b += 15 * (b & 0x00000000ffffffffULL);
r.val |= b << 7;
r.p += (__builtin_ctzll(stop_bit) + 1) / 8;
if (stop_bit == 0) {
// Error: unterminated varint.
upb_decoderet err_r = {(void*)0, 0};
return err_r;
}
return r;
}
// For now, always use the branch32 decoder.
#define upb_decode_varint_fast upb_decode_varint_branch32
INLINE upb_decoderet upb_decode_varint_fast(const char *p) {
// Use nobranch2 on 64-bit, branch32 on 32-bit.
if (sizeof(long) == 8)
return upb_decode_varint_nobranch2(p);
else
return upb_decode_varint_branch32(p);
}
#ifdef __cplusplus
} /* extern "C" */

@ -50,16 +50,14 @@ static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {
TEST_VARINT_DECODER(branch32);
TEST_VARINT_DECODER(branch64);
#ifdef __SSE__
TEST_VARINT_DECODER(nobranch);
#endif
TEST_VARINT_DECODER(nobranch1);
TEST_VARINT_DECODER(nobranch2);
int main() {
test_branch32();
test_branch64();
#ifdef __SSE__
test_nobranch();
#endif
test_nobranch1();
test_nobranch2();
}
#if 0

@ -7,6 +7,8 @@
#ifndef UPB_TEST_H_
#define UPB_TEST_H_
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif

Loading…
Cancel
Save