protobuf/upb/pb/varint.h

/*
 * upb - a minimalist implementation of protocol buffers.
 *
 * Copyright (c) 2011 Google Inc.  See LICENSE for details.
 * Author: Josh Haberman <jhaberman@gmail.com>
 *
 * A number of routines for varint manipulation (we keep them all around to
 * have multiple approaches available for benchmarking).
 */

#ifndef UPB_VARINT_DECODER_H_
#define UPB_VARINT_DECODER_H_

#include <stdint.h>
#include <string.h>
#include "upb/upb.h"

#ifdef __cplusplus
extern "C" {
#endif

// A list of types as they are encoded on-the-wire.
typedef enum {
  UPB_WIRE_TYPE_VARINT      = 0,
  UPB_WIRE_TYPE_64BIT       = 1,
  UPB_WIRE_TYPE_DELIMITED   = 2,
  UPB_WIRE_TYPE_START_GROUP = 3,
  UPB_WIRE_TYPE_END_GROUP   = 4,
  UPB_WIRE_TYPE_32BIT       = 5,
} upb_wiretype_t;

// The maximum number of bytes that it takes to encode a 64-bit varint.
// Note that with a better encoding this could be 9 (TODO: write up a
// wiki document about this).
#define UPB_PB_VARINT_MAX_LEN 10

/* Zig-zag encoding/decoding **************************************************/

INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
INLINE uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }

/* Decoding *******************************************************************/

// All decoding functions return this struct by value.
typedef struct {
  const char *p;  // NULL if the varint was unterminated.
  uint64_t val;
} upb_decoderet;

// Four functions for decoding a varint of at most eight bytes.  They are all
// functionally identical, but are implemented in different ways and likely have
// different performance profiles.  We keep them around for performance testing.
//
// Note that these functions may not read byte-by-byte, so they must not be used
// unless there are at least eight bytes left in the buffer!
upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r);
upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r);
upb_decoderet upb_vdecode_max8_wright(upb_decoderet r);
upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r);

// Template for a function that checks the first two bytes with branching
// and dispatches 2-10 bytes with a separate function.  Note that this may read
// up to 10 bytes, so it must not be used unless there are at least ten bytes
// left in the buffer!
#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function)                  \
INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) {             \
  uint8_t *p = (uint8_t*)_p;                                                   \
  if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7fU}; return r; }  \
  upb_decoderet r = {_p + 2, (*p & 0x7fU) | ((*(p + 1) & 0x7fU) << 7)};        \
  if ((*(p + 1) & 0x80) == 0) return r;                                        \
  return decode_max8_function(r);                                              \
}

UPB_VARINT_DECODER_CHECK2(branch32, upb_vdecode_max8_branch32);
UPB_VARINT_DECODER_CHECK2(branch64, upb_vdecode_max8_branch64);
UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright);
UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino);
#undef UPB_VARINT_DECODER_CHECK2

// Our canonical functions for decoding varints, based on the currently
// favored best-performing implementations.
INLINE upb_decoderet upb_vdecode_fast(const char *p) {
  if (sizeof(long) == 8)
    return upb_vdecode_check2_massimino(p);
  else
    return upb_vdecode_check2_branch32(p);
}

INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {
  return upb_vdecode_max8_massimino(r);
}


/* Encoding *******************************************************************/

INLINE int upb_value_size(uint64_t val) {
#ifdef __GNUC__
  int high_bit = 63 - __builtin_clzll(val);  // 0-based, undef if val == 0.
#else
  int high_bit = 0;
  uint64_t tmp = val;
  while(tmp >>= 1) high_bit++;
#endif
  return val == 0 ? 1 : high_bit / 8 + 1;
}

// Encodes a 64-bit varint into buf (which must be >=UPB_PB_VARINT_MAX_LEN
// bytes long), returning how many bytes were used.
//
// TODO: benchmark and optimize if necessary.
INLINE size_t upb_vencode64(uint64_t val, char *buf) {
  if (val == 0) { buf[0] = 0; return 1; }
  size_t i = 0;
  while (val) {
    uint8_t byte = val & 0x7fU;
    val >>= 7;
    if (val) byte |= 0x80U;
    buf[i++] = byte;
  }
  return i;
}

// Encodes a 32-bit varint, *not* sign-extended.
INLINE uint64_t upb_vencode32(uint32_t val) {
  char buf[UPB_PB_VARINT_MAX_LEN];
  size_t bytes = upb_vencode64(val, buf);
  uint64_t ret = 0;
  assert(bytes <= 5);
  memcpy(&ret, buf, bytes);
  assert(ret <= 0xffffffffffU);
  return ret;
}

#ifdef __cplusplus
}  /* extern "C" */
#endif

#endif  /* UPB_VARINT_DECODER_H_ */
Split varint decoders into separate .h file. This makes it easier to benchmark and test the multiple possible implementations of varint decoding. 14 years ago			`/*`
			`* upb - a minimalist implementation of protocol buffers.`
			`*`
Update copyright to be Google Inc. This doesn't reflect any material change in how I will be working on upb, and I have no problem making this change. It's still open source under the BSD license, and I'll still be working on it well beyond the hours that constitute a normal job. 14 years ago			`* Copyright (c) 2011 Google Inc. See LICENSE for details.`
			`* Author: Josh Haberman <jhaberman@gmail.com>`
			`*`
Refactor varint encoding/decoding. 14 years ago			`* A number of routines for varint manipulation (we keep them all around to`
			`* have multiple approaches available for benchmarking).`
Split varint decoders into separate .h file. This makes it easier to benchmark and test the multiple possible implementations of varint decoding. 14 years ago			`*/`

			`#ifndef UPB_VARINT_DECODER_H_`
			`#define UPB_VARINT_DECODER_H_`

			`#include <stdint.h>`
			`#include <string.h>`
Directory restructure. Includes are now via upb/foo.h. Files specific to the protobuf format are now in upb/pb (the core library is concerned with message definitions, handlers, and byte streams, but knows nothing about any particular serializationf format). 14 years ago			`#include "upb/upb.h"`
Split varint decoders into separate .h file. This makes it easier to benchmark and test the multiple possible implementations of varint decoding. 14 years ago
			`#ifdef __cplusplus`
			`extern "C" {`
			`#endif`

Sync from internal Google development. Many improvements, too many to mention. One significant perf regression warrants investigation: omitfp.parsetoproto2_googlemessage1.upb_jit: 343 -> 252 (-26.53) plain.parsetoproto2_googlemessage1.upb_jit: 334 -> 251 (-24.85) 25% regression for this benchmark is bad, but since I don't think there's any fundamental design issue that caused it I'm going to go ahead with the commit anyway. Can investigate and fix later. Other benchmarks were neutral or showed slight improvement. 13 years ago			`// A list of types as they are encoded on-the-wire.`
			`typedef enum {`
			`UPB_WIRE_TYPE_VARINT = 0,`
			`UPB_WIRE_TYPE_64BIT = 1,`
			`UPB_WIRE_TYPE_DELIMITED = 2,`
			`UPB_WIRE_TYPE_START_GROUP = 3,`
			`UPB_WIRE_TYPE_END_GROUP = 4,`
			`UPB_WIRE_TYPE_32BIT = 5,`
			`} upb_wiretype_t;`

Sync with internal Google development. This breaks the open-source build, will follow up with a change to fix it. 13 years ago			`// The maximum number of bytes that it takes to encode a 64-bit varint.`
			`// Note that with a better encoding this could be 9 (TODO: write up a`
			`// wiki document about this).`
			`#define UPB_PB_VARINT_MAX_LEN 10`

			`/* Zig-zag encoding/decoding **************************************************/`

			`INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }`
			`INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }`
			`INLINE uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }`
			`INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }`

Refactor varint encoding/decoding. 14 years ago			`/* Decoding *******************************************************************/`

Split varint decoders into separate .h file. This makes it easier to benchmark and test the multiple possible implementations of varint decoding. 14 years ago			`// All decoding functions return this struct by value.`
			`typedef struct {`
			`const char *p; // NULL if the varint was unterminated.`
			`uint64_t val;`
			`} upb_decoderet;`

Sync with 8 months of Google-internal development. Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink). 12 years ago			`// Four functions for decoding a varint of at most eight bytes. They are all`
			`// functionally identical, but are implemented in different ways and likely have`
			`// different performance profiles. We keep them around for performance testing.`
			`//`
			`// Note that these functions may not read byte-by-byte, so they must not be used`
			`// unless there are at least eight bytes left in the buffer!`
			`upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r);`
			`upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r);`
Decoder redesign in preparation for packed fields and start/endseq. 14 years ago			`upb_decoderet upb_vdecode_max8_wright(upb_decoderet r);`
			`upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r);`
Split varint decoders into separate .h file. This makes it easier to benchmark and test the multiple possible implementations of varint decoding. 14 years ago
First rough version of the JIT. It can successfully parse SpeedMessage1. Preliminary results: 750MB/s on Core2 2.4GHz. This number is 2.5x proto2. This isn't apples-to-apples, because proto2 is parsing to a struct and we are just doing stream parsing, but for apps that are currently using proto2, this is the improvement they would see if they could move to stream-based processing. Unfortunately perf-regression-test.py is broken, and I'm not 100% sure why. It would be nice to fix it first (to ensure that there are no performance regressions for the table-based decoder) but I'm really impatient to get the JIT checked in. 14 years ago			`// Template for a function that checks the first two bytes with branching`
Sync with 8 months of Google-internal development. Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink). 12 years ago			`// and dispatches 2-10 bytes with a separate function. Note that this may read`
			`// up to 10 bytes, so it must not be used unless there are at least ten bytes`
			`// left in the buffer!`
			`#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \`
			`INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \`
			`uint8_t p = (uint8_t)_p; \`
			`if ((p & 0x80) == 0) { upb_decoderet r = {_p + 1, p & 0x7fU}; return r; } \`
			`upb_decoderet r = {_p + 2, (p & 0x7fU) \| (((p + 1) & 0x7fU) << 7)}; \`
			`if ((*(p + 1) & 0x80) == 0) return r; \`
			`return decode_max8_function(r); \`
First rough version of the JIT. It can successfully parse SpeedMessage1. Preliminary results: 750MB/s on Core2 2.4GHz. This number is 2.5x proto2. This isn't apples-to-apples, because proto2 is parsing to a struct and we are just doing stream parsing, but for apps that are currently using proto2, this is the improvement they would see if they could move to stream-based processing. Unfortunately perf-regression-test.py is broken, and I'm not 100% sure why. It would be nice to fix it first (to ensure that there are no performance regressions for the table-based decoder) but I'm really impatient to get the JIT checked in. 14 years ago			`}`

Sync with 8 months of Google-internal development. Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink). 12 years ago			`UPB_VARINT_DECODER_CHECK2(branch32, upb_vdecode_max8_branch32);`
			`UPB_VARINT_DECODER_CHECK2(branch64, upb_vdecode_max8_branch64);`
First rough version of the JIT. It can successfully parse SpeedMessage1. Preliminary results: 750MB/s on Core2 2.4GHz. This number is 2.5x proto2. This isn't apples-to-apples, because proto2 is parsing to a struct and we are just doing stream parsing, but for apps that are currently using proto2, this is the improvement they would see if they could move to stream-based processing. Unfortunately perf-regression-test.py is broken, and I'm not 100% sure why. It would be nice to fix it first (to ensure that there are no performance regressions for the table-based decoder) but I'm really impatient to get the JIT checked in. 14 years ago			`UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright);`
			`UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino);`
			`#undef UPB_VARINT_DECODER_CHECK2`

			`// Our canonical functions for decoding varints, based on the currently`
			`// favored best-performing implementations.`
			`INLINE upb_decoderet upb_vdecode_fast(const char *p) {`
Switch to non-branching varint decoder. 14 years ago			`if (sizeof(long) == 8)`
First rough version of the JIT. It can successfully parse SpeedMessage1. Preliminary results: 750MB/s on Core2 2.4GHz. This number is 2.5x proto2. This isn't apples-to-apples, because proto2 is parsing to a struct and we are just doing stream parsing, but for apps that are currently using proto2, this is the improvement they would see if they could move to stream-based processing. Unfortunately perf-regression-test.py is broken, and I'm not 100% sure why. It would be nice to fix it first (to ensure that there are no performance regressions for the table-based decoder) but I'm really impatient to get the JIT checked in. 14 years ago			`return upb_vdecode_check2_massimino(p);`
Switch to non-branching varint decoder. 14 years ago			`else`
Sync with 8 months of Google-internal development. Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink). 12 years ago			`return upb_vdecode_check2_branch32(p);`
First rough version of the JIT. It can successfully parse SpeedMessage1. Preliminary results: 750MB/s on Core2 2.4GHz. This number is 2.5x proto2. This isn't apples-to-apples, because proto2 is parsing to a struct and we are just doing stream parsing, but for apps that are currently using proto2, this is the improvement they would see if they could move to stream-based processing. Unfortunately perf-regression-test.py is broken, and I'm not 100% sure why. It would be nice to fix it first (to ensure that there are no performance regressions for the table-based decoder) but I'm really impatient to get the JIT checked in. 14 years ago			`}`

			`INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {`
			`return upb_vdecode_max8_massimino(r);`
Switch to non-branching varint decoder. 14 years ago			`}`
Split varint decoders into separate .h file. This makes it easier to benchmark and test the multiple possible implementations of varint decoding. 14 years ago
Refactor varint encoding/decoding. 14 years ago
			`/* Encoding *******************************************************************/`

Refinement of upb_bytesrc interface. Added a upb_byteregion that tracks a region of the input buffer; decoders use this instead of using a upb_bytesrc directly. upb_byteregion is also used as the way of passing a string to a upb_handlers callback. This symmetry makes decoders compose better; if you want to take a parsed string and decode it as something else, you can take the string directly from the callback and feed it as input to another parser. A commented-out version of a pinning interface is present; I decline to actually implement it (and accept its extra complexity) until/unless it is clear that it is actually a win. But it is included as a proof-of-concept, to show that it fits well with the existing interface. 13 years ago			`INLINE int upb_value_size(uint64_t val) {`
Refactor varint encoding/decoding. 14 years ago			`#ifdef __GNUC__`
			`int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0.`
			`#else`
			`int high_bit = 0;`
			`uint64_t tmp = val;`
			`while(tmp >>= 1) high_bit++;`
			`#endif`
			`return val == 0 ? 1 : high_bit / 8 + 1;`
			`}`

Sync with internal Google development. This breaks the open-source build, will follow up with a change to fix it. 13 years ago			`// Encodes a 64-bit varint into buf (which must be >=UPB_PB_VARINT_MAX_LEN`
			`// bytes long), returning how many bytes were used.`
			`//`
			`// TODO: benchmark and optimize if necessary.`
			`INLINE size_t upb_vencode64(uint64_t val, char *buf) {`
			`if (val == 0) { buf[0] = 0; return 1; }`
			`size_t i = 0;`
			`while (val) {`
Sync with 8 months of Google-internal development. Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink). 12 years ago			`uint8_t byte = val & 0x7fU;`
Sync with internal Google development. This breaks the open-source build, will follow up with a change to fix it. 13 years ago			`val >>= 7;`
Sync with 8 months of Google-internal development. Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink). 12 years ago			`if (val) byte \|= 0x80U;`
Sync with internal Google development. This breaks the open-source build, will follow up with a change to fix it. 13 years ago			`buf[i++] = byte;`
			`}`
			`return i;`
			`}`

Decoder redesign in preparation for packed fields and start/endseq. 14 years ago			`// Encodes a 32-bit varint, not sign-extended.`
			`INLINE uint64_t upb_vencode32(uint32_t val) {`
Sync with internal Google development. This breaks the open-source build, will follow up with a change to fix it. 13 years ago			`char buf[UPB_PB_VARINT_MAX_LEN];`
			`size_t bytes = upb_vencode64(val, buf);`
Refactor varint encoding/decoding. 14 years ago			`uint64_t ret = 0;`
Sync with internal Google development. This breaks the open-source build, will follow up with a change to fix it. 13 years ago			`assert(bytes <= 5);`
			`memcpy(&ret, buf, bytes);`
Sync with 8 months of Google-internal development. Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink). 12 years ago			`assert(ret <= 0xffffffffffU);`
Refactor varint encoding/decoding. 14 years ago			`return ret;`
			`}`

Split varint decoders into separate .h file. This makes it easier to benchmark and test the multiple possible implementations of varint decoding. 14 years ago			`#ifdef __cplusplus`
			`} /* extern "C" */`
			`#endif`

			`#endif /* UPB_VARINT_DECODER_H_ */`