Refactor varint encoding/decoding.

14 years ago · 4a99abba12
parent 7cf5893dcc
commit 4a99abba12
4 changed files with 45 additions and 34 deletions
--- a/src/upb_decoder.c
+++ b/src/upb_decoder.c
@ -9,7 +9,7 @@
 #include <stddef.h>
 #include <stdlib.h>
 #include "upb_decoder.h"
-#include "upb_varint_decoder.h"
+#include "upb_varint.h"

 #ifdef UPB_USE_JIT_X64
 #define Dst_DECL upb_decoder *d
--- a/src/upb_decoder_x86.dasc
+++ b/src/upb_decoder_x86.dasc
@ -249,28 +249,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
 |.endmacro

 #include <stdlib.h>
-#include "upb_varint_decoder.h"
-
-static size_t upb_value_size(uint64_t val) {
-#ifdef __GNUC__
-  int high_bit = 63 - __builtin_clzll(val);  // 0-based, undef if val == 0.
-#else
-  int high_bit = 0;
-  uint64_t tmp = val;
-  while(tmp >>= 1) high_bit++;
-#endif
-  return val == 0 ? 1 : high_bit / 8 + 1;
-}
-
-static uint64_t upb_encode_varint(uint64_t val)
-{
-  uint64_t ret = 0;
-  for (int bitpos = 0; val; bitpos+=8, val >>=7) {
-    if (bitpos > 0) ret |= (1 << (bitpos-1));
-    ret |= (val & 0x7f) << bitpos;
-  }
-  return ret;
-}
+#include "upb_varint.h"

 // PTR should point to the beginning of the tag.
 static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag,
@ -493,7 +472,7 @@ static void upb_decoder_jit_msg(upb_decoder *d, upb_handlers_msgent *m) {
  for(int i = 0; i < num_keys; i++) {
    uint32_t key = keys[i];
    upb_handlers_fieldent *f = upb_inttable_lookup(&m->fieldtab, key);
-    uint32_t tag = upb_encode_varint(key);
+    uint32_t tag = upb_vencode(key);
    if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f);
    last_tag = tag;
    last_f = f;
--- a/src/upb_varint_decoder.h
+++ b/src/upb_varint_decoder.h
@ -4,11 +4,8 @@
 * Copyright (c) 2011 Google Inc.  See LICENSE for details.
 * Author: Josh Haberman <jhaberman@gmail.com>
 *
- * A number of routines for varint decoding (we keep them all around to have
- * multiple approaches available for benchmarking).  All of these functions
- * require the buffer to have at least 10 bytes available; if we don't know
- * for sure that there are 10 bytes, then there is only one viable option
- * (branching on every byte).
+ * A number of routines for varint manipulation (we keep them all around to
+ * have multiple approaches available for benchmarking).
 */

 #ifndef UPB_VARINT_DECODER_H_
@ -22,6 +19,8 @@
 extern "C" {
 #endif

+/* Decoding *******************************************************************/
+
 // All decoding functions return this struct by value.
 typedef struct {
  const char *p;  // NULL if the varint was unterminated.
@ -76,12 +75,21 @@ done:
  return r;
 }

+// Given an encoded varint v, returns an integer with a single bit set that
+// indicates the end of the varint.  Subtracting one from this value will
+// yield a mask that leaves only bits that are part of the varint.  Returns
+// 0 if the varint is unterminated.
+INLINE uint64_t upb_get_vstopbit(uint64_t v) {
+  uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
+  return ~cbits & (cbits+1);
+}
+INLINE uint64_t upb_get_vmask(uint64_t v) { return upb_get_vstopbit(v) - 1; }
+
 // Decodes a varint of at most 8 bytes without branching (except for error).
 INLINE upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
  uint64_t b;
  memcpy(&b, r.p, sizeof(b));
-  uint64_t cbits = b | 0x7f7f7f7f7f7f7f7fULL;
-  uint64_t stop_bit = ~cbits & (cbits+1);
+  uint64_t stop_bit = upb_get_vstopbit(b);
  b &= (stop_bit - 1);
  b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f);
  b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff);
@ -100,8 +108,7 @@ INLINE upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
 INLINE upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
  uint64_t b;
  memcpy(&b, r.p, sizeof(b));
-  uint64_t cbits = b | 0x7f7f7f7f7f7f7f7fULL;
-  uint64_t stop_bit = ~cbits & (cbits + 1);
+  uint64_t stop_bit = upb_get_vstopbit(b);
  b =  (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
  b +=       b & 0x007f007f007f007fULL;
  b +=  3 * (b & 0x0000ffff0000ffffULL);
@ -148,6 +155,31 @@ INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {
  return upb_vdecode_max8_massimino(r);
 }

+
+/* Encoding *******************************************************************/
+
+INLINE size_t upb_value_size(uint64_t val) {
+#ifdef __GNUC__
+  int high_bit = 63 - __builtin_clzll(val);  // 0-based, undef if val == 0.
+#else
+  int high_bit = 0;
+  uint64_t tmp = val;
+  while(tmp >>= 1) high_bit++;
+#endif
+  return val == 0 ? 1 : high_bit / 8 + 1;
+}
+
+// Currently only works with 32-bit varints.
+INLINE uint64_t upb_vencode(uint32_t val) {
+  uint64_t ret = 0;
+  for (int bitpos = 0; val; bitpos+=8, val >>=7) {
+    if (bitpos > 0) ret |= (1 << (bitpos-1));
+    ret |= (val & 0x7f) << bitpos;
+  }
+  return ret;
+}
+
+
 #ifdef __cplusplus
 }  /* extern "C" */
 #endif
--- a/tests/test_varint.c
+++ b/tests/test_varint.c
@ -4,7 +4,7 @@
 * Copyright (c) 2011 Google Inc.  See LICENSE for details.
 */

-#include "upb_varint_decoder.h"
+#include "upb_varint.h"
 #include "upb_test.h"

 static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {