protobuf/upb/pb/varint.int.h

/*
 * upb - a minimalist implementation of protocol buffers.
 *
 * Copyright (c) 2011 Google Inc.  See LICENSE for details.
 * Author: Josh Haberman <jhaberman@gmail.com>
 *
 * A number of routines for varint manipulation (we keep them all around to
 * have multiple approaches available for benchmarking).
 */

#ifndef UPB_VARINT_DECODER_H_
#define UPB_VARINT_DECODER_H_

#include <assert.h>
#include <stdint.h>
#include <string.h>
#include "upb/upb.h"

#ifdef __cplusplus
extern "C" {
#endif

// A list of types as they are encoded on-the-wire.
typedef enum {
  UPB_WIRE_TYPE_VARINT      = 0,
  UPB_WIRE_TYPE_64BIT       = 1,
  UPB_WIRE_TYPE_DELIMITED   = 2,
  UPB_WIRE_TYPE_START_GROUP = 3,
  UPB_WIRE_TYPE_END_GROUP   = 4,
  UPB_WIRE_TYPE_32BIT       = 5,
} upb_wiretype_t;

#define UPB_MAX_WIRE_TYPE 5

// The maximum number of bytes that it takes to encode a 64-bit varint.
// Note that with a better encoding this could be 9 (TODO: write up a
// wiki document about this).
#define UPB_PB_VARINT_MAX_LEN 10

// Array of the "native" (ie. non-packed-repeated) wire type for the given a
// descriptor type (upb_descriptortype_t).
extern const uint8_t upb_pb_native_wire_types[];

/* Zig-zag encoding/decoding **************************************************/

UPB_INLINE int32_t upb_zzdec_32(uint32_t n) {
  return (n >> 1) ^ -(int32_t)(n & 1);
}
UPB_INLINE int64_t upb_zzdec_64(uint64_t n) {
  return (n >> 1) ^ -(int64_t)(n & 1);
}
UPB_INLINE uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
UPB_INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }

/* Decoding *******************************************************************/

// All decoding functions return this struct by value.
typedef struct {
  const char *p;  // NULL if the varint was unterminated.
  uint64_t val;
} upb_decoderet;

// Four functions for decoding a varint of at most eight bytes.  They are all
// functionally identical, but are implemented in different ways and likely have
// different performance profiles.  We keep them around for performance testing.
//
// Note that these functions may not read byte-by-byte, so they must not be used
// unless there are at least eight bytes left in the buffer!
upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r);
upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r);
upb_decoderet upb_vdecode_max8_wright(upb_decoderet r);
upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r);

// Template for a function that checks the first two bytes with branching
// and dispatches 2-10 bytes with a separate function.  Note that this may read
// up to 10 bytes, so it must not be used unless there are at least ten bytes
// left in the buffer!
#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function)                  \
UPB_INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) {         \
  uint8_t *p = (uint8_t*)_p;                                                   \
  if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7fU}; return r; }  \
  upb_decoderet r = {_p + 2, (*p & 0x7fU) | ((*(p + 1) & 0x7fU) << 7)};        \
  if ((*(p + 1) & 0x80) == 0) return r;                                        \
  return decode_max8_function(r);                                              \
}

UPB_VARINT_DECODER_CHECK2(branch32, upb_vdecode_max8_branch32);
UPB_VARINT_DECODER_CHECK2(branch64, upb_vdecode_max8_branch64);
UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright);
UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino);
#undef UPB_VARINT_DECODER_CHECK2

// Our canonical functions for decoding varints, based on the currently
// favored best-performing implementations.
UPB_INLINE upb_decoderet upb_vdecode_fast(const char *p) {
  if (sizeof(long) == 8)
    return upb_vdecode_check2_branch64(p);
  else
    return upb_vdecode_check2_branch32(p);
}

UPB_INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {
  return upb_vdecode_max8_massimino(r);
}


/* Encoding *******************************************************************/

UPB_INLINE int upb_value_size(uint64_t val) {
#ifdef __GNUC__
  int high_bit = 63 - __builtin_clzll(val);  // 0-based, undef if val == 0.
#else
  int high_bit = 0;
  uint64_t tmp = val;
  while(tmp >>= 1) high_bit++;
#endif
  return val == 0 ? 1 : high_bit / 8 + 1;
}

// Encodes a 64-bit varint into buf (which must be >=UPB_PB_VARINT_MAX_LEN
// bytes long), returning how many bytes were used.
//
// TODO: benchmark and optimize if necessary.
UPB_INLINE size_t upb_vencode64(uint64_t val, char *buf) {
  if (val == 0) { buf[0] = 0; return 1; }
  size_t i = 0;
  while (val) {
    uint8_t byte = val & 0x7fU;
    val >>= 7;
    if (val) byte |= 0x80U;
    buf[i++] = byte;
  }
  return i;
}

UPB_INLINE size_t upb_varint_size(uint64_t val) {
  char buf[UPB_PB_VARINT_MAX_LEN];
  return upb_vencode64(val, buf);
}

// Encodes a 32-bit varint, *not* sign-extended.
UPB_INLINE uint64_t upb_vencode32(uint32_t val) {
  char buf[UPB_PB_VARINT_MAX_LEN];
  size_t bytes = upb_vencode64(val, buf);
  uint64_t ret = 0;
  assert(bytes <= 5);
  memcpy(&ret, buf, bytes);
  assert(ret <= 0xffffffffffU);
  return ret;
}

#ifdef __cplusplus
}  /* extern "C" */
#endif

#endif  /* UPB_VARINT_DECODER_H_ */
Split varint decoders into separate .h file. This makes it easier to benchmark and test the multiple possible implementations of varint decoding. 14 years ago			`/*`
			`* upb - a minimalist implementation of protocol buffers.`
			`*`
Update copyright to be Google Inc. This doesn't reflect any material change in how I will be working on upb, and I have no problem making this change. It's still open source under the BSD license, and I'll still be working on it well beyond the hours that constitute a normal job. 14 years ago			`* Copyright (c) 2011 Google Inc. See LICENSE for details.`
			`* Author: Josh Haberman <jhaberman@gmail.com>`
			`*`
Refactor varint encoding/decoding. 14 years ago			`* A number of routines for varint manipulation (we keep them all around to`
			`* have multiple approaches available for benchmarking).`
Split varint decoders into separate .h file. This makes it easier to benchmark and test the multiple possible implementations of varint decoding. 14 years ago			`*/`

			`#ifndef UPB_VARINT_DECODER_H_`
			`#define UPB_VARINT_DECODER_H_`

Merge from Google-internal development: - rewritten decoder; interpreted decoder is bytecode-based, JIT decoder no longer falls back to the interpreter. - C++ improvements: C++11-compatible iterators, upb::reffed_ptr for RAII refcounting, better upcast/downcast support. - removed the gross upb_value abstraction from public upb.h. 11 years ago			`#include <assert.h>`
Split varint decoders into separate .h file. This makes it easier to benchmark and test the multiple possible implementations of varint decoding. 14 years ago			`#include <stdint.h>`
			`#include <string.h>`
Directory restructure. Includes are now via upb/foo.h. Files specific to the protobuf format are now in upb/pb (the core library is concerned with message definitions, handlers, and byte streams, but knows nothing about any particular serializationf format). 14 years ago			`#include "upb/upb.h"`
Split varint decoders into separate .h file. This makes it easier to benchmark and test the multiple possible implementations of varint decoding. 14 years ago
			`#ifdef __cplusplus`
			`extern "C" {`
			`#endif`

Sync from internal Google development. Many improvements, too many to mention. One significant perf regression warrants investigation: omitfp.parsetoproto2_googlemessage1.upb_jit: 343 -> 252 (-26.53) plain.parsetoproto2_googlemessage1.upb_jit: 334 -> 251 (-24.85) 25% regression for this benchmark is bad, but since I don't think there's any fundamental design issue that caused it I'm going to go ahead with the commit anyway. Can investigate and fix later. Other benchmarks were neutral or showed slight improvement. 13 years ago			`// A list of types as they are encoded on-the-wire.`
			`typedef enum {`
			`UPB_WIRE_TYPE_VARINT = 0,`
			`UPB_WIRE_TYPE_64BIT = 1,`
			`UPB_WIRE_TYPE_DELIMITED = 2,`
			`UPB_WIRE_TYPE_START_GROUP = 3,`
			`UPB_WIRE_TYPE_END_GROUP = 4,`
			`UPB_WIRE_TYPE_32BIT = 5,`
			`} upb_wiretype_t;`

Merge from Google-internal development: - rewritten decoder; interpreted decoder is bytecode-based, JIT decoder no longer falls back to the interpreter. - C++ improvements: C++11-compatible iterators, upb::reffed_ptr for RAII refcounting, better upcast/downcast support. - removed the gross upb_value abstraction from public upb.h. 11 years ago			`#define UPB_MAX_WIRE_TYPE 5`

Sync with internal Google development. This breaks the open-source build, will follow up with a change to fix it. 13 years ago			`// The maximum number of bytes that it takes to encode a 64-bit varint.`
			`// Note that with a better encoding this could be 9 (TODO: write up a`
			`// wiki document about this).`
			`#define UPB_PB_VARINT_MAX_LEN 10`

Sync to Google-internal development. 10 years ago			`// Array of the "native" (ie. non-packed-repeated) wire type for the given a`
			`// descriptor type (upb_descriptortype_t).`
			`extern const uint8_t upb_pb_native_wire_types[];`

Sync with internal Google development. This breaks the open-source build, will follow up with a change to fix it. 13 years ago			`/* Zig-zag encoding/decoding **************************************************/`

Synced with 3 months of Google-internal development. Major changes: - Got rid of all bytestream interfaces in favor of using regular handlers. - new Pipeline object represents a upb pipeline, does bump allocation internally to manage memory. - proto2 support now can handle extensions. 12 years ago			`UPB_INLINE int32_t upb_zzdec_32(uint32_t n) {`
			`return (n >> 1) ^ -(int32_t)(n & 1);`
			`}`
			`UPB_INLINE int64_t upb_zzdec_64(uint64_t n) {`
			`return (n >> 1) ^ -(int64_t)(n & 1);`
			`}`
			`UPB_INLINE uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }`
			`UPB_INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }`
Sync with internal Google development. This breaks the open-source build, will follow up with a change to fix it. 13 years ago
Refactor varint encoding/decoding. 14 years ago			`/* Decoding *******************************************************************/`

Split varint decoders into separate .h file. This makes it easier to benchmark and test the multiple possible implementations of varint decoding. 14 years ago			`// All decoding functions return this struct by value.`
			`typedef struct {`
			`const char *p; // NULL if the varint was unterminated.`
			`uint64_t val;`
			`} upb_decoderet;`

Sync with 8 months of Google-internal development. Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink). 12 years ago			`// Four functions for decoding a varint of at most eight bytes. They are all`
			`// functionally identical, but are implemented in different ways and likely have`
			`// different performance profiles. We keep them around for performance testing.`
			`//`
			`// Note that these functions may not read byte-by-byte, so they must not be used`
			`// unless there are at least eight bytes left in the buffer!`
			`upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r);`
			`upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r);`
Decoder redesign in preparation for packed fields and start/endseq. 14 years ago			`upb_decoderet upb_vdecode_max8_wright(upb_decoderet r);`
			`upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r);`
Split varint decoders into separate .h file. This makes it easier to benchmark and test the multiple possible implementations of varint decoding. 14 years ago
First rough version of the JIT. It can successfully parse SpeedMessage1. Preliminary results: 750MB/s on Core2 2.4GHz. This number is 2.5x proto2. This isn't apples-to-apples, because proto2 is parsing to a struct and we are just doing stream parsing, but for apps that are currently using proto2, this is the improvement they would see if they could move to stream-based processing. Unfortunately perf-regression-test.py is broken, and I'm not 100% sure why. It would be nice to fix it first (to ensure that there are no performance regressions for the table-based decoder) but I'm really impatient to get the JIT checked in. 14 years ago			`// Template for a function that checks the first two bytes with branching`
Sync with 8 months of Google-internal development. Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink). 12 years ago			`// and dispatches 2-10 bytes with a separate function. Note that this may read`
			`// up to 10 bytes, so it must not be used unless there are at least ten bytes`
			`// left in the buffer!`
			`#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \`
Synced with 3 months of Google-internal development. Major changes: - Got rid of all bytestream interfaces in favor of using regular handlers. - new Pipeline object represents a upb pipeline, does bump allocation internally to manage memory. - proto2 support now can handle extensions. 12 years ago			`UPB_INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \`
Sync with 8 months of Google-internal development. Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink). 12 years ago			`uint8_t p = (uint8_t)_p; \`
			`if ((p & 0x80) == 0) { upb_decoderet r = {_p + 1, p & 0x7fU}; return r; } \`
			`upb_decoderet r = {_p + 2, (p & 0x7fU) \| (((p + 1) & 0x7fU) << 7)}; \`
			`if ((*(p + 1) & 0x80) == 0) return r; \`
			`return decode_max8_function(r); \`
First rough version of the JIT. It can successfully parse SpeedMessage1. Preliminary results: 750MB/s on Core2 2.4GHz. This number is 2.5x proto2. This isn't apples-to-apples, because proto2 is parsing to a struct and we are just doing stream parsing, but for apps that are currently using proto2, this is the improvement they would see if they could move to stream-based processing. Unfortunately perf-regression-test.py is broken, and I'm not 100% sure why. It would be nice to fix it first (to ensure that there are no performance regressions for the table-based decoder) but I'm really impatient to get the JIT checked in. 14 years ago			`}`

Sync with 8 months of Google-internal development. Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink). 12 years ago			`UPB_VARINT_DECODER_CHECK2(branch32, upb_vdecode_max8_branch32);`
			`UPB_VARINT_DECODER_CHECK2(branch64, upb_vdecode_max8_branch64);`
First rough version of the JIT. It can successfully parse SpeedMessage1. Preliminary results: 750MB/s on Core2 2.4GHz. This number is 2.5x proto2. This isn't apples-to-apples, because proto2 is parsing to a struct and we are just doing stream parsing, but for apps that are currently using proto2, this is the improvement they would see if they could move to stream-based processing. Unfortunately perf-regression-test.py is broken, and I'm not 100% sure why. It would be nice to fix it first (to ensure that there are no performance regressions for the table-based decoder) but I'm really impatient to get the JIT checked in. 14 years ago			`UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright);`
			`UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino);`
			`#undef UPB_VARINT_DECODER_CHECK2`

			`// Our canonical functions for decoding varints, based on the currently`
			`// favored best-performing implementations.`
Synced with 3 months of Google-internal development. Major changes: - Got rid of all bytestream interfaces in favor of using regular handlers. - new Pipeline object represents a upb pipeline, does bump allocation internally to manage memory. - proto2 support now can handle extensions. 12 years ago			`UPB_INLINE upb_decoderet upb_vdecode_fast(const char *p) {`
Switch to non-branching varint decoder. 14 years ago			`if (sizeof(long) == 8)`
Merge from Google-internal development: - rewritten decoder; interpreted decoder is bytecode-based, JIT decoder no longer falls back to the interpreter. - C++ improvements: C++11-compatible iterators, upb::reffed_ptr for RAII refcounting, better upcast/downcast support. - removed the gross upb_value abstraction from public upb.h. 11 years ago			`return upb_vdecode_check2_branch64(p);`
Switch to non-branching varint decoder. 14 years ago			`else`
Sync with 8 months of Google-internal development. Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink). 12 years ago			`return upb_vdecode_check2_branch32(p);`
First rough version of the JIT. It can successfully parse SpeedMessage1. Preliminary results: 750MB/s on Core2 2.4GHz. This number is 2.5x proto2. This isn't apples-to-apples, because proto2 is parsing to a struct and we are just doing stream parsing, but for apps that are currently using proto2, this is the improvement they would see if they could move to stream-based processing. Unfortunately perf-regression-test.py is broken, and I'm not 100% sure why. It would be nice to fix it first (to ensure that there are no performance regressions for the table-based decoder) but I'm really impatient to get the JIT checked in. 14 years ago			`}`

Synced with 3 months of Google-internal development. Major changes: - Got rid of all bytestream interfaces in favor of using regular handlers. - new Pipeline object represents a upb pipeline, does bump allocation internally to manage memory. - proto2 support now can handle extensions. 12 years ago			`UPB_INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {`
First rough version of the JIT. It can successfully parse SpeedMessage1. Preliminary results: 750MB/s on Core2 2.4GHz. This number is 2.5x proto2. This isn't apples-to-apples, because proto2 is parsing to a struct and we are just doing stream parsing, but for apps that are currently using proto2, this is the improvement they would see if they could move to stream-based processing. Unfortunately perf-regression-test.py is broken, and I'm not 100% sure why. It would be nice to fix it first (to ensure that there are no performance regressions for the table-based decoder) but I'm really impatient to get the JIT checked in. 14 years ago			`return upb_vdecode_max8_massimino(r);`
Switch to non-branching varint decoder. 14 years ago			`}`
Split varint decoders into separate .h file. This makes it easier to benchmark and test the multiple possible implementations of varint decoding. 14 years ago
Refactor varint encoding/decoding. 14 years ago
			`/* Encoding *******************************************************************/`

Synced with 3 months of Google-internal development. Major changes: - Got rid of all bytestream interfaces in favor of using regular handlers. - new Pipeline object represents a upb pipeline, does bump allocation internally to manage memory. - proto2 support now can handle extensions. 12 years ago			`UPB_INLINE int upb_value_size(uint64_t val) {`
Refactor varint encoding/decoding. 14 years ago			`#ifdef __GNUC__`
			`int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0.`
			`#else`
			`int high_bit = 0;`
			`uint64_t tmp = val;`
			`while(tmp >>= 1) high_bit++;`
			`#endif`
			`return val == 0 ? 1 : high_bit / 8 + 1;`
			`}`

Sync with internal Google development. This breaks the open-source build, will follow up with a change to fix it. 13 years ago			`// Encodes a 64-bit varint into buf (which must be >=UPB_PB_VARINT_MAX_LEN`
			`// bytes long), returning how many bytes were used.`
			`//`
			`// TODO: benchmark and optimize if necessary.`
Synced with 3 months of Google-internal development. Major changes: - Got rid of all bytestream interfaces in favor of using regular handlers. - new Pipeline object represents a upb pipeline, does bump allocation internally to manage memory. - proto2 support now can handle extensions. 12 years ago			`UPB_INLINE size_t upb_vencode64(uint64_t val, char *buf) {`
Sync with internal Google development. This breaks the open-source build, will follow up with a change to fix it. 13 years ago			`if (val == 0) { buf[0] = 0; return 1; }`
			`size_t i = 0;`
			`while (val) {`
Sync with 8 months of Google-internal development. Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink). 12 years ago			`uint8_t byte = val & 0x7fU;`
Sync with internal Google development. This breaks the open-source build, will follow up with a change to fix it. 13 years ago			`val >>= 7;`
Sync with 8 months of Google-internal development. Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink). 12 years ago			`if (val) byte \|= 0x80U;`
Sync with internal Google development. This breaks the open-source build, will follow up with a change to fix it. 13 years ago			`buf[i++] = byte;`
			`}`
			`return i;`
			`}`

Sync to Google-internal development. 10 years ago			`UPB_INLINE size_t upb_varint_size(uint64_t val) {`
			`char buf[UPB_PB_VARINT_MAX_LEN];`
			`return upb_vencode64(val, buf);`
			`}`

Decoder redesign in preparation for packed fields and start/endseq. 14 years ago			`// Encodes a 32-bit varint, not sign-extended.`
Synced with 3 months of Google-internal development. Major changes: - Got rid of all bytestream interfaces in favor of using regular handlers. - new Pipeline object represents a upb pipeline, does bump allocation internally to manage memory. - proto2 support now can handle extensions. 12 years ago			`UPB_INLINE uint64_t upb_vencode32(uint32_t val) {`
Sync with internal Google development. This breaks the open-source build, will follow up with a change to fix it. 13 years ago			`char buf[UPB_PB_VARINT_MAX_LEN];`
			`size_t bytes = upb_vencode64(val, buf);`
Refactor varint encoding/decoding. 14 years ago			`uint64_t ret = 0;`
Sync with internal Google development. This breaks the open-source build, will follow up with a change to fix it. 13 years ago			`assert(bytes <= 5);`
			`memcpy(&ret, buf, bytes);`
Sync with 8 months of Google-internal development. Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink). 12 years ago			`assert(ret <= 0xffffffffffU);`
Refactor varint encoding/decoding. 14 years ago			`return ret;`
			`}`

Split varint decoders into separate .h file. This makes it easier to benchmark and test the multiple possible implementations of varint decoding. 14 years ago			`#ifdef __cplusplus`
			`} /* extern "C" */`
			`#endif`

			`#endif /* UPB_VARINT_DECODER_H_ */`