Switch to non-branching varint decoder.

pull/13171/head
Joshua Haberman 14 years ago
parent 2c86e7eddb
commit 19517cc6f3
  1. 92
      src/upb_varint_decoder.h
  2. 10
      tests/test_varint.c
  3. 2
      tests/upb_test.h

@ -76,43 +76,69 @@ done:
return r; return r;
} }
#ifdef __SSE__ // Avoids branches for values >2-bytes.
INLINE upb_decoderet upb_decode_varint_nobranch1(const char *p) {
#include <xmmintrin.h> uint64_t b = 0;
upb_decoderet r = {p, 0};
// Avoids branches (this can very likely be improved). Requires SSE. memcpy(&b, r.p, 2);
INLINE upb_decoderet upb_decode_varint_nobranch(const char *p) { if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; }
upb_decoderet r = {(void*)0, 0}; r.val = (b & 0x7f) | ((b & 0x7f00) >> 1);
__m128i val128 = _mm_loadu_si128((void*)p); r.p = p + 2;
unsigned int continuation_bits = _mm_movemask_epi8(val128); if ((b & 0x8000) == 0) return r;
unsigned int bsr_val = ~continuation_bits;
int varint_length = __builtin_ffs(bsr_val); // >2-byte varint.
if (varint_length > 10) return r; memcpy(&b, r.p, sizeof(b));
uint64_t cbits = b | 0x7f7f7f7f7f7f7f7fULL;
uint16_t twob; uint64_t stop_bit = ~cbits & (cbits+1);
memcpy(&twob, p, 2); b &= (stop_bit - 1);
twob &= 0x7f7f; b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f);
twob = ((twob & 0xff00) >> 1) | (twob & 0xff); b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff);
b = ((b & 0xffffffff00000000) >> 4) | (b & 0x00000000ffffffff);
uint64_t eightb; r.val |= b << 14;
memcpy(&eightb, p + 2, 8); r.p += (__builtin_ctzll(stop_bit) + 1) / 8;
eightb &= 0x7f7f7f7f7f7f7f7f; if (stop_bit == 0) {
eightb = ((eightb & 0xff00ff00ff00ff00) >> 1) | (eightb & 0x00ff00ff00ff00ff); // Error: unterminated varint.
eightb = ((eightb & 0xffff0000ffff0000) >> 2) | (eightb & 0x0000ffff0000ffff); upb_decoderet err_r = {(void*)0, 0};
eightb = ((eightb & 0xffffffff00000000) >> 4) | (eightb & 0x00000000ffffffff); return err_r;
}
uint64_t all_bits = twob | (eightb << 14);
int varint_bits = varint_length * 7;
uint64_t mask = varint_bits == 70 ? (uint64_t)-1 : (1ULL << (varint_bits)) - 1;
r.val = all_bits & mask;
r.p = p + varint_length;
return r; return r;
} }
#endif // Avoids branches for values >2-bytes.
INLINE upb_decoderet upb_decode_varint_nobranch2(const char *p) {
uint64_t b = 0;
upb_decoderet r = {p, 0};
memcpy(&b, r.p, 2);
if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; }
r.val = (b & 0x7f) | ((b & 0x7f00) >> 1);
r.p = p + 2;
if ((b & 0x8000) == 0) return r;
// >2-byte varint.
memcpy(&b, r.p, sizeof(b));
uint64_t cbits = b | 0x7f7f7f7f7f7f7f7fULL;
uint64_t stop_bit = ~cbits & (cbits + 1);
b = (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
b += b & 0x007f007f007f007fULL;
b += 3 * (b & 0x0000ffff0000ffffULL);
b += 15 * (b & 0x00000000ffffffffULL);
r.val |= b << 7;
r.p += (__builtin_ctzll(stop_bit) + 1) / 8;
if (stop_bit == 0) {
// Error: unterminated varint.
upb_decoderet err_r = {(void*)0, 0};
return err_r;
}
return r;
}
// For now, always use the branch32 decoder. INLINE upb_decoderet upb_decode_varint_fast(const char *p) {
#define upb_decode_varint_fast upb_decode_varint_branch32 // Use nobranch2 on 64-bit, branch32 on 32-bit.
if (sizeof(long) == 8)
return upb_decode_varint_nobranch2(p);
else
return upb_decode_varint_branch32(p);
}
#ifdef __cplusplus #ifdef __cplusplus
} /* extern "C" */ } /* extern "C" */

@ -50,16 +50,14 @@ static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {
TEST_VARINT_DECODER(branch32); TEST_VARINT_DECODER(branch32);
TEST_VARINT_DECODER(branch64); TEST_VARINT_DECODER(branch64);
#ifdef __SSE__ TEST_VARINT_DECODER(nobranch1);
TEST_VARINT_DECODER(nobranch); TEST_VARINT_DECODER(nobranch2);
#endif
int main() { int main() {
test_branch32(); test_branch32();
test_branch64(); test_branch64();
#ifdef __SSE__ test_nobranch1();
test_nobranch(); test_nobranch2();
#endif
} }
#if 0 #if 0

@ -7,6 +7,8 @@
#ifndef UPB_TEST_H_ #ifndef UPB_TEST_H_
#define UPB_TEST_H_ #define UPB_TEST_H_
#include <stdlib.h>
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif

Loading…
Cancel
Save