Use the "shldq" decoder for the specialized 64-bit Varint parsers, rather than

using the "RotRight7" decoder.  The "shldq" technique is much faster on recent
Intel and AMD CPUs, when processing larger integers, especially on Zen.

PiperOrigin-RevId: 498078103
pull/11417/head
Protobuf Team Bot 2 years ago committed by Copybara-Service
parent b3ec9ec291
commit 0ca97a1d7d
  1. 257
      src/google/protobuf/generated_message_tctable_impl.h
  2. 135
      src/google/protobuf/generated_message_tctable_lite.cc
  3. 205
      src/google/protobuf/generated_message_tctable_lite_test.cc

@ -259,16 +259,12 @@ enum FieldType : uint16_t {
} // namespace field_layout
#ifndef NDEBUG
template <size_t align>
void AlignFail(uintptr_t address) {
GOOGLE_ABSL_LOG(FATAL) << "Unaligned (" << align << ") access at " << address;
// Explicit abort to let compilers know this function does not return
abort();
}
extern template void AlignFail<4>(uintptr_t);
extern template void AlignFail<8>(uintptr_t);
PROTOBUF_EXPORT void AlignFail(std::integral_constant<size_t, 4>,
std::uintptr_t address);
PROTOBUF_EXPORT void AlignFail(std::integral_constant<size_t, 8>,
std::uintptr_t address);
inline void AlignFail(std::integral_constant<size_t, 1>,
std::uintptr_t address) {}
#endif
// TcParser implements most of the parsing logic for tailcall tables.
@ -365,29 +361,39 @@ class PROTOBUF_EXPORT TcParser final {
// Manually unrolled and specialized Varint parsing.
template <typename FieldType, int data_offset, int hasbit_idx>
static const char* SpecializedUnrolledVImpl1(PROTOBUF_TC_PARAM_DECL);
static const char* FastTV32S1(PROTOBUF_TC_PARAM_DECL);
template <typename FieldType, int data_offset, int hasbit_idx>
static const char* FastTV64S1(PROTOBUF_TC_PARAM_DECL);
template <int data_offset, int hasbit_idx>
static const char* SpecializedFastV8S1(PROTOBUF_TC_PARAM_DECL);
static const char* FastTV8S1(PROTOBUF_TC_PARAM_DECL);
template <typename FieldType, int data_offset, int hasbit_idx>
static constexpr TailCallParseFunc SingularVarintNoZag1() {
if (data_offset < 100) {
if (sizeof(FieldType) == 1) {
return &SpecializedFastV8S1<data_offset, hasbit_idx>;
if (sizeof(FieldType) == 1) {
if (data_offset < 100) {
return &FastTV8S1<data_offset, hasbit_idx>;
} else {
return &FastV8S1;
}
}
if (sizeof(FieldType) == 4) {
if (data_offset < 100) {
return &FastTV32S1<FieldType, data_offset, hasbit_idx>;
} else { //
return &FastV32S1;
}
}
if (sizeof(FieldType) == 8) {
if (data_offset < 128) {
return &FastTV64S1<FieldType, data_offset, hasbit_idx>;
} else {
return &FastV64S1;
}
return &SpecializedUnrolledVImpl1<FieldType, data_offset, hasbit_idx>;
} else if (sizeof(FieldType) == 1) {
return &FastV8S1;
} else if (sizeof(FieldType) == 4) {
return &FastV32S1;
} else if (sizeof(FieldType) == 8) {
return &FastV64S1;
} else {
static_assert(sizeof(FieldType) == 1 || sizeof(FieldType) == 4 ||
sizeof(FieldType) == 8,
"");
return nullptr;
}
static_assert(sizeof(FieldType) == 1 || sizeof(FieldType) == 4 ||
sizeof(FieldType) == 8,
"");
std::abort(); // unreachable
}
// Functions referenced by generated fast tables (closed enum):
@ -482,7 +488,10 @@ class PROTOBUF_EXPORT TcParser final {
#ifndef NDEBUG
if (PROTOBUF_PREDICT_FALSE(
reinterpret_cast<uintptr_t>(target) % alignof(T) != 0)) {
AlignFail<alignof(T)>(reinterpret_cast<uintptr_t>(target));
AlignFail(std::integral_constant<size_t, alignof(T)>(),
reinterpret_cast<uintptr_t>(target));
// Explicit abort to let compilers know this code-path does not return
abort();
}
#endif
return *target;
@ -495,7 +504,10 @@ class PROTOBUF_EXPORT TcParser final {
#ifndef NDEBUG
if (PROTOBUF_PREDICT_FALSE(
reinterpret_cast<uintptr_t>(target) % alignof(T) != 0)) {
AlignFail<alignof(T)>(reinterpret_cast<uintptr_t>(target));
AlignFail(std::integral_constant<size_t, alignof(T)>(),
reinterpret_cast<uintptr_t>(target));
// Explicit abort to let compilers know this code-path does not return
abort();
}
#endif
return *target;
@ -537,7 +549,7 @@ class PROTOBUF_EXPORT TcParser final {
};
static TestMiniParseResult TestMiniParse(PROTOBUF_TC_PARAM_DECL);
template <bool export_called_function>
static const char* MiniParseImpl(PROTOBUF_TC_PARAM_DECL);
static const char* MiniParse(PROTOBUF_TC_PARAM_DECL);
template <typename TagType, bool group_coding, bool aux_is_table>
static inline const char* SingularParseMessageAuxImpl(PROTOBUF_TC_PARAM_DECL);
@ -714,12 +726,127 @@ class PROTOBUF_EXPORT TcParser final {
static const char* MpFallback(PROTOBUF_TC_PARAM_DECL);
};
// Shift "byte" left by n * 7 bits, filling vacated bits with ones.
template <int n>
inline PROTOBUF_ALWAYS_INLINE uint64_t
shift_left_fill_with_ones(uint64_t byte, uint64_t ones) {
return (byte << (n * 7)) | (ones >> (64 - (n * 7)));
}
// Shift "byte" left by n * 7 bits, filling vacated bits with ones, and
// put the new value in res. Return whether the result was negative.
template <int n>
inline PROTOBUF_ALWAYS_INLINE bool shift_left_fill_with_ones_was_negative(
uint64_t byte, uint64_t ones, int64_t& res) {
#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__)
// For the first two rounds (ptr[1] and ptr[2]), micro benchmarks show a
// substantial improvement from capturing the sign from the condition code
// register on x86-64.
bool sign_bit;
asm("shldq %3, %2, %1"
: "=@ccs"(sign_bit), "+r"(byte)
: "r"(ones), "i"(n * 7));
res = byte;
return sign_bit;
#else
// Generic fallback:
res = shift_left_fill_with_ones<n>(byte, ones);
return static_cast<int64_t>(res) < 0;
#endif
}
inline PROTOBUF_ALWAYS_INLINE std::pair<const char*, uint64_t>
Parse64FallbackPair(const char* p, int64_t res1) {
auto ptr = reinterpret_cast<const int8_t*>(p);
// The algorithm relies on sign extension for each byte to set all high bits
// when the varint continues. It also relies on asserting all of the lower
// bits for each successive byte read. This allows the result to be aggregated
// using a bitwise AND. For example:
//
// 8 1 64 57 ... 24 17 16 9 8 1
// ptr[0] = 1aaa aaaa ; res1 = 1111 1111 ... 1111 1111 1111 1111 1aaa aaaa
// ptr[1] = 1bbb bbbb ; res2 = 1111 1111 ... 1111 1111 11bb bbbb b111 1111
// ptr[2] = 1ccc cccc ; res3 = 0000 0000 ... 000c cccc cc11 1111 1111 1111
// ---------------------------------------------
// res1 & res2 & res3 = 0000 0000 ... 000c cccc ccbb bbbb baaa aaaa
//
// On x86-64, a shld from a single register filled with enough 1s in the high
// bits can accomplish all this in one instruction. It so happens that res1
// has 57 high bits of ones, which is enough for the largest shift done.
//
// Just as importantly, by keeping results in res1, res2, and res3, we take
// advantage of the superscalar abilities of the CPU.
GOOGLE_ABSL_DCHECK_EQ(res1 >> 7, -1);
uint64_t ones = res1; // save the high 1 bits from res1 (input to SHLD)
int64_t res2, res3; // accumulated result chunks
if (!shift_left_fill_with_ones_was_negative<1>(ptr[1], ones, res2))
goto done2;
if (!shift_left_fill_with_ones_was_negative<2>(ptr[2], ones, res3))
goto done3;
// For the remainder of the chunks, check the sign of the AND result.
res1 &= shift_left_fill_with_ones<3>(ptr[3], ones);
if (res1 >= 0) goto done4;
res2 &= shift_left_fill_with_ones<4>(ptr[4], ones);
if (res2 >= 0) goto done5;
res3 &= shift_left_fill_with_ones<5>(ptr[5], ones);
if (res3 >= 0) goto done6;
res1 &= shift_left_fill_with_ones<6>(ptr[6], ones);
if (res1 >= 0) goto done7;
res2 &= shift_left_fill_with_ones<7>(ptr[7], ones);
if (res2 >= 0) goto done8;
res3 &= shift_left_fill_with_ones<8>(ptr[8], ones);
if (res3 >= 0) goto done9;
// For valid 64bit varints, the 10th byte/ptr[9] should be exactly 1. In this
// case, the continuation bit of ptr[8] already set the top bit of res3
// correctly, so all we have to do is check that the expected case is true.
if (PROTOBUF_PREDICT_TRUE(ptr[9] == 1)) goto done10;
// A value of 0, however, represents an over-serialized varint. This case
// should not happen, but if does (say, due to a nonconforming serializer),
// deassert the continuation bit that came from ptr[8].
if (ptr[9] == 0) {
#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__)
// Use a small instruction since this is an uncommon code path.
asm("btcq $63,%0" : "+r"(res3));
#else
res3 ^= static_cast<uint64_t>(1) << 63;
#endif
goto done10;
}
// If the 10th byte/ptr[9] itself has any other value, then it is too big to
// fit in 64 bits. If the continue bit is set, it is an unterminated varint.
return {nullptr, 0};
done2:
return {p + 2, res1 & res2};
done3:
return {p + 3, res1 & res2 & res3};
done4:
return {p + 4, res1 & res2 & res3};
done5:
return {p + 5, res1 & res2 & res3};
done6:
return {p + 6, res1 & res2 & res3};
done7:
return {p + 7, res1 & res2 & res3};
done8:
return {p + 8, res1 & res2 & res3};
done9:
return {p + 9, res1 & res2 & res3};
done10:
return {p + 10, res1 & res2 & res3};
}
// Notes:
// 1) if data_offset is negative, it's read from data.offset()
// 2) if hasbit_idx is negative, it's read from data.hasbit_idx()
template <int data_offset, int hasbit_idx>
PROTOBUF_NOINLINE const char* TcParser::SpecializedFastV8S1(
PROTOBUF_TC_PARAM_DECL) {
PROTOBUF_NOINLINE const char* TcParser::FastTV8S1(PROTOBUF_TC_PARAM_DECL) {
using TagType = uint8_t;
// Special case for a varint bool field with a tag of 1 byte:
@ -766,8 +893,40 @@ PROTOBUF_NOINLINE const char* TcParser::SpecializedFastV8S1(
}
template <typename FieldType, int data_offset, int hasbit_idx>
PROTOBUF_NOINLINE const char* TcParser::SpecializedUnrolledVImpl1(
PROTOBUF_TC_PARAM_DECL) {
PROTOBUF_NOINLINE const char* TcParser::FastTV64S1(PROTOBUF_TC_PARAM_DECL) {
using TagType = uint8_t;
// super-early success test...
if (PROTOBUF_PREDICT_TRUE(((data.data) & 0x80FF) == 0)) {
ptr += sizeof(TagType); // Consume tag
if (hasbit_idx < 32) {
hasbits |= (uint64_t{1} << hasbit_idx);
}
uint8_t value = data.data >> 8;
RefAt<FieldType>(msg, data_offset) = value;
ptr += 1;
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
}
ptr += sizeof(TagType); // Consume tag
if (hasbit_idx < 32) {
hasbits |= (uint64_t{1} << hasbit_idx);
}
auto tmp = Parse64FallbackPair(ptr, static_cast<int8_t>(data.data >> 8));
data.data = 0; // Indicate to the compiler that we don't need this anymore.
ptr = tmp.first;
if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
return Error(PROTOBUF_TC_PARAM_PASS);
}
RefAt<FieldType>(msg, data_offset) = static_cast<FieldType>(tmp.second);
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
template <typename FieldType, int data_offset, int hasbit_idx>
PROTOBUF_NOINLINE const char* TcParser::FastTV32S1(PROTOBUF_TC_PARAM_DECL) {
using TagType = uint8_t;
// super-early success test...
if (PROTOBUF_PREDICT_TRUE(((data.data) & 0x80FF) == 0)) {
@ -800,34 +959,30 @@ PROTOBUF_NOINLINE const char* TcParser::SpecializedUnrolledVImpl1(
if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
res = RotRight7AndReplaceLowByte(res, ptr[4]);
if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
res = RotRight7AndReplaceLowByte(res, ptr[5]);
if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
res = RotRight7AndReplaceLowByte(res, ptr[6]);
if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
res = RotRight7AndReplaceLowByte(res, ptr[7]);
if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
res = RotRight7AndReplaceLowByte(res, ptr[8]);
if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
if (PROTOBUF_PREDICT_FALSE(ptr[5] & 0x80)) {
if (PROTOBUF_PREDICT_FALSE(ptr[6] & 0x80)) {
if (PROTOBUF_PREDICT_FALSE(ptr[7] & 0x80)) {
if (PROTOBUF_PREDICT_FALSE(ptr[8] & 0x80)) {
if (ptr[9] & 0xFE) return Error(PROTOBUF_TC_PARAM_PASS);
res = RotateLeft(res, -7) & ~1;
res += ptr[9] & 1;
*out = RotateLeft(res, 63);
*out = RotateLeft(res, 28);
ptr += 10;
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
PROTOBUF_MUSTTAIL return ToTagDispatch(
PROTOBUF_TC_PARAM_PASS);
}
*out = RotateLeft(res, 56);
*out = RotateLeft(res, 28);
ptr += 9;
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
PROTOBUF_MUSTTAIL return ToTagDispatch(
PROTOBUF_TC_PARAM_PASS);
}
*out = RotateLeft(res, 49);
*out = RotateLeft(res, 28);
ptr += 8;
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
*out = RotateLeft(res, 42);
*out = RotateLeft(res, 28);
ptr += 7;
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
*out = RotateLeft(res, 35);
*out = RotateLeft(res, 28);
ptr += 6;
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}

@ -58,8 +58,18 @@ using FieldEntry = TcParseTableBase::FieldEntry;
//////////////////////////////////////////////////////////////////////////////
#ifndef NDEBUG
template void AlignFail<4>(uintptr_t);
template void AlignFail<8>(uintptr_t);
void AlignFail(std::integral_constant<size_t, 4>, std::uintptr_t address) {
GOOGLE_ABSL_LOG(FATAL) << "Unaligned (4) access at " << address;
// Explicit abort to let compilers know this function does not return
abort();
}
void AlignFail(std::integral_constant<size_t, 8>, std::uintptr_t address) {
GOOGLE_ABSL_LOG(FATAL) << "Unaligned (8) access at " << address;
// Explicit abort to let compilers know this function does not return
abort();
}
#endif
const char* TcParser::GenericFallbackLite(PROTOBUF_TC_PARAM_DECL) {
@ -257,7 +267,7 @@ absl::string_view TcParser::FieldName(const TcParseTableBase* table,
}
template <bool export_called_function>
inline PROTOBUF_ALWAYS_INLINE const char* TcParser::MiniParseImpl(
inline PROTOBUF_ALWAYS_INLINE const char* TcParser::MiniParse(
PROTOBUF_TC_PARAM_DECL) {
TestMiniParseResult* test_out;
if (export_called_function) {
@ -342,13 +352,13 @@ inline PROTOBUF_ALWAYS_INLINE const char* TcParser::MiniParseImpl(
}
PROTOBUF_NOINLINE const char* TcParser::MiniParse(PROTOBUF_TC_PARAM_DECL) {
PROTOBUF_MUSTTAIL return MiniParseImpl<false>(PROTOBUF_TC_PARAM_PASS);
PROTOBUF_MUSTTAIL return MiniParse<false>(PROTOBUF_TC_PARAM_PASS);
}
PROTOBUF_NOINLINE TcParser::TestMiniParseResult TcParser::TestMiniParse(
PROTOBUF_TC_PARAM_DECL) {
TestMiniParseResult result = {};
data.data = reinterpret_cast<uintptr_t>(&result);
result.ptr = MiniParseImpl<true>(PROTOBUF_TC_PARAM_PASS);
result.ptr = MiniParse<true>(PROTOBUF_TC_PARAM_PASS);
return result;
}
@ -678,119 +688,6 @@ PROTOBUF_NOINLINE const char* TcParser::FastF64P2(PROTOBUF_TC_PARAM_DECL) {
namespace {
// Shift "byte" left by n * 7 bits, filling vacated bits with ones.
template <int n>
inline PROTOBUF_ALWAYS_INLINE uint64_t
shift_left_fill_with_ones(uint64_t byte, uint64_t ones) {
return (byte << (n * 7)) | (ones >> (64 - (n * 7)));
}
// Shift "byte" left by n * 7 bits, filling vacated bits with ones, and
// put the new value in res. Return whether the result was negative.
template <int n>
inline PROTOBUF_ALWAYS_INLINE bool shift_left_fill_with_ones_was_negative(
uint64_t byte, uint64_t ones, int64_t& res) {
#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__)
// For the first two rounds (ptr[1] and ptr[2]), micro benchmarks show a
// substantial improvement from capturing the sign from the condition code
// register on x86-64.
bool sign_bit;
asm("shldq %3, %2, %1"
: "=@ccs"(sign_bit), "+r"(byte)
: "r"(ones), "i"(n * 7));
res = byte;
return sign_bit;
#else
// Generic fallback:
res = (byte << (n * 7)) | (ones >> (64 - (n * 7)));
return static_cast<int64_t>(res) < 0;
#endif
}
inline PROTOBUF_ALWAYS_INLINE std::pair<const char*, uint64_t>
Parse64FallbackPair(const char* p, int64_t res1) {
auto ptr = reinterpret_cast<const int8_t*>(p);
// The algorithm relies on sign extension for each byte to set all high bits
// when the varint continues. It also relies on asserting all of the lower
// bits for each successive byte read. This allows the result to be aggregated
// using a bitwise AND. For example:
//
// 8 1 64 57 ... 24 17 16 9 8 1
// ptr[0] = 1aaa aaaa ; res1 = 1111 1111 ... 1111 1111 1111 1111 1aaa aaaa
// ptr[1] = 1bbb bbbb ; res2 = 1111 1111 ... 1111 1111 11bb bbbb b111 1111
// ptr[2] = 1ccc cccc ; res3 = 0000 0000 ... 000c cccc cc11 1111 1111 1111
// ---------------------------------------------
// res1 & res2 & res3 = 0000 0000 ... 000c cccc ccbb bbbb baaa aaaa
//
// On x86-64, a shld from a single register filled with enough 1s in the high
// bits can accomplish all this in one instruction. It so happens that res1
// has 57 high bits of ones, which is enough for the largest shift done.
GOOGLE_ABSL_DCHECK_EQ(res1 >> 7, -1);
uint64_t ones = res1; // save the high 1 bits from res1 (input to SHLD)
int64_t res2, res3; // accumulated result chunks
if (!shift_left_fill_with_ones_was_negative<1>(ptr[1], ones, res2))
goto done2;
if (!shift_left_fill_with_ones_was_negative<2>(ptr[2], ones, res3))
goto done3;
// For the remainder of the chunks, check the sign of the AND result.
res1 &= shift_left_fill_with_ones<3>(ptr[3], ones);
if (res1 >= 0) goto done4;
res2 &= shift_left_fill_with_ones<4>(ptr[4], ones);
if (res2 >= 0) goto done5;
res3 &= shift_left_fill_with_ones<5>(ptr[5], ones);
if (res3 >= 0) goto done6;
res1 &= shift_left_fill_with_ones<6>(ptr[6], ones);
if (res1 >= 0) goto done7;
res2 &= shift_left_fill_with_ones<7>(ptr[7], ones);
if (res2 >= 0) goto done8;
res3 &= shift_left_fill_with_ones<8>(ptr[8], ones);
if (res3 >= 0) goto done9;
// For valid 64bit varints, the 10th byte/ptr[9] should be exactly 1. In this
// case, the continuation bit of ptr[8] already set the top bit of res3
// correctly, so all we have to do is check that the expected case is true.
if (PROTOBUF_PREDICT_TRUE(ptr[9] == 1)) goto done10;
// A value of 0, however, represents an over-serialized varint. This case
// should not happen, but if does (say, due to a nonconforming serializer),
// deassert the continuation bit that came from ptr[8].
if (ptr[9] == 0) {
#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__)
// Use a small instruction since this is an uncommon code path.
asm("btcq $63,%0" : "+r"(res3));
#else
res3 ^= static_cast<uint64_t>(1) << 63;
#endif
goto done10;
}
// If the 10th byte/ptr[9] itself has any other value, then it is too big to
// fit in 64 bits. If the continue bit is set, it is an unterminated varint.
return {nullptr, 0};
done2:
return {p + 2, res1 & res2};
done3:
return {p + 3, res1 & res2 & res3};
done4:
return {p + 4, res1 & res2 & res3};
done5:
return {p + 5, res1 & res2 & res3};
done6:
return {p + 6, res1 & res2 & res3};
done7:
return {p + 7, res1 & res2 & res3};
done8:
return {p + 8, res1 & res2 & res3};
done9:
return {p + 9, res1 & res2 & res3};
done10:
return {p + 10, res1 & res2 & res3};
}
template <typename Type>
inline PROTOBUF_ALWAYS_INLINE const char* ParseVarint(const char* p,
Type* value) {
@ -969,7 +866,7 @@ PROTOBUF_NOINLINE const char* TcParser::SingularVarBigint(
}
PROTOBUF_NOINLINE const char* TcParser::FastV8S1(PROTOBUF_TC_PARAM_DECL) {
PROTOBUF_MUSTTAIL return SpecializedFastV8S1<-1, -1>(PROTOBUF_TC_PARAM_PASS);
PROTOBUF_MUSTTAIL return FastTV8S1<-1, -1>(PROTOBUF_TC_PARAM_PASS);
}
PROTOBUF_NOINLINE const char* TcParser::FastV8S2(PROTOBUF_TC_PARAM_DECL) {
PROTOBUF_MUSTTAIL return SingularVarint<bool, uint16_t>(

@ -33,6 +33,7 @@
#include "google/protobuf/generated_message_tctable_impl.h"
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "absl/types/optional.h"
#include "google/protobuf/wire_format_lite.h"
namespace google {
@ -43,6 +44,210 @@ namespace {
using ::testing::Eq;
using ::testing::Not;
using ::testing::Optional;
// The fast parser's dispatch table Xors two bytes of incoming data with
// the data in TcFieldData, so we reproduce that here:
TcFieldData Xor2SerializedBytes(TcFieldData tfd, const char* ptr) {
uint64_t twobytes = 0xFF & ptr[0];
twobytes |= (0xFF & ptr[1]) << 8;
tfd.data ^= twobytes;
return tfd;
}
absl::optional<const char*> fallback_ptr_received;
absl::optional<uint64_t> fallback_hasbits_received;
absl::optional<uint64_t> fallback_tag_received;
const char* FastParserGaveUp(::google::protobuf::MessageLite*, const char* ptr,
::google::protobuf::internal::ParseContext*,
::google::protobuf::internal::TcFieldData data,
const ::google::protobuf::internal::TcParseTableBase*,
uint64_t hasbits) {
fallback_ptr_received = ptr;
fallback_hasbits_received = hasbits;
fallback_tag_received = data.tag();
return nullptr;
}
// To test that we aren't storing too much data, we set up a fake message area
// and fill all its bytes with kDND.
constexpr char kDND = 0x5A; // "Do Not Disturb"
// To retrieve data and see if it matches what we expect, we have this routine
// which simultaneously reads the data we want, and sets it back to what it was
// before the test, that is, to kDND. This makes it easier to test at the end
// that all the original data is undisturbed.
template <typename T>
T ReadAndReset(char* p) {
T result;
memcpy(&result, p, sizeof(result));
memset(p, kDND, sizeof(result));
return result;
}
TEST(FastVarints, NameHere) {
constexpr uint8_t kHasBitsOffset = 4;
constexpr uint8_t kHasBitIndex = 0;
constexpr uint8_t kFieldOffset = 24;
// clang-format on
const TcParseTable<0, 1, 0, 0, 2> parse_table = {
{
kHasBitsOffset, //
0, 0, 0, // no _extensions_
1, 0, // max_field_number, fast_idx_mask
offsetof(decltype(parse_table), field_lookup_table),
0xFFFFFFFF - 1, // skipmap
offsetof(decltype(parse_table), field_entries),
1, // num_field_entries
0, // num_aux_entries
offsetof(decltype(parse_table), field_names), // no aux_entries
nullptr, // default instance
FastParserGaveUp, // fallback
},
// Fast Table:
{{
// optional int32 field = 1;
{TcParser::SingularVarintNoZag1<::uint32_t, kFieldOffset,
kHasBitIndex>(),
{/* coded_tag= */ 8, kHasBitIndex, /* aux_idx= */ 0, kFieldOffset}},
}},
// Field Lookup Table:
{{65535, 65535}},
// Field Entries:
{{
// This is set to kFkNone to force MiniParse to call the fallback
{kFieldOffset, kHasBitsOffset + 0, 0, (field_layout::kFkNone)},
}},
// no aux_entries
{{}},
};
// clang-format on
uint8_t serialize_buffer[64];
for (int size : {8, 32, 64, -8, -32, -64}) {
auto next_i = [](uint64_t i) {
// if i + 1 is a power of two, return that.
// (This will also match when i == -1, but for this loop we know that will
// not happen.)
if ((i & (i + 1)) == 0) return i + 1;
// otherwise, i is already a power of two, so advance to one less than the
// next power of two.
return i + (i - 1);
};
for (uint64_t i = 0; i + 1 != 0; i = next_i(i)) {
char fake_msg[64] = {
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
};
memset(&fake_msg[kHasBitsOffset], 0, sizeof(uint32_t));
auto serialize_ptr = WireFormatLite::WriteUInt64ToArray(
/* field_number= */ 1, i, serialize_buffer);
absl::string_view serialized{
reinterpret_cast<char*>(&serialize_buffer[0]),
static_cast<size_t>(serialize_ptr - serialize_buffer)};
const char* ptr = nullptr;
const char* end_ptr = nullptr;
ParseContext ctx(io::CodedInputStream::GetDefaultRecursionLimit(),
/* aliasing= */ false, &ptr, serialized);
#if 0 // FOR_DEBUGGING
GOOGLE_ABSL_LOG(ERROR) << "size=" << size << " i=" << i << " ptr points to " //
<< +ptr[0] << "," << +ptr[1] << "," //
<< +ptr[2] << "," << +ptr[3] << "," //
<< +ptr[4] << "," << +ptr[5] << "," //
<< +ptr[6] << "," << +ptr[7] << "," //
<< +ptr[8] << "," << +ptr[9] << "," << +ptr[10] << "\n";
#endif
TailCallParseFunc fn = nullptr;
switch (size) {
case 8:
fn = &TcParser::FastV8S1;
break;
case -8:
fn = &TcParser::FastTV8S1<kFieldOffset, kHasBitIndex>;
break;
case 32:
fn = &TcParser::FastV32S1;
break;
case -32:
fn = &TcParser::FastTV32S1<uint32_t, kFieldOffset, kHasBitIndex>;
break;
case 64:
fn = &TcParser::FastV64S1;
break;
case -64:
fn = &TcParser::FastTV64S1<uint64_t, kFieldOffset, kHasBitIndex>;
break;
}
fallback_ptr_received = absl::nullopt;
fallback_hasbits_received = absl::nullopt;
fallback_tag_received = absl::nullopt;
end_ptr = fn(reinterpret_cast<MessageLite*>(fake_msg), ptr, &ctx,
Xor2SerializedBytes(parse_table.fast_entries[0].bits, ptr),
&parse_table.header, /*hasbits=*/0);
switch (size) {
case -8:
case 8: {
if (end_ptr == nullptr) {
// If end_ptr is nullptr, that means the FastParser gave up and
// tried to pass control to MiniParse.... which is expected anytime
// we encounter something other than 0 or 1 encodings. (Since
// FastV8S1 is only used for `bool` fields.)
EXPECT_NE(i, true);
EXPECT_NE(i, false);
EXPECT_THAT(fallback_hasbits_received, Optional(0));
// Like the mini-parser functions, and unlike the fast-parser
// functions, the fallback receives a ptr already incremented past
// the tag, and receives the actual tag in the `data` parameter.
EXPECT_THAT(fallback_ptr_received, Optional(ptr + 1));
EXPECT_THAT(fallback_tag_received, Optional(0x7F & *ptr));
continue;
}
ASSERT_EQ(end_ptr - ptr, serialized.size());
auto actual_field = ReadAndReset<uint8_t>(&fake_msg[kFieldOffset]);
EXPECT_EQ(actual_field, static_cast<decltype(actual_field)>(i)) //
<< " hex: " << absl::StrCat(absl::Hex(actual_field));
}; break;
case -32:
case 32: {
ASSERT_EQ(end_ptr - ptr, serialized.size());
auto actual_field = ReadAndReset<uint32_t>(&fake_msg[kFieldOffset]);
EXPECT_EQ(actual_field, static_cast<decltype(actual_field)>(i)) //
<< " hex: " << absl::StrCat(absl::Hex(actual_field));
}; break;
case -64:
case 64: {
ASSERT_EQ(end_ptr - ptr, serialized.size());
auto actual_field = ReadAndReset<uint64_t>(&fake_msg[kFieldOffset]);
EXPECT_EQ(actual_field, static_cast<decltype(actual_field)>(i)) //
<< " hex: " << absl::StrCat(absl::Hex(actual_field));
}; break;
}
EXPECT_TRUE(!fallback_ptr_received);
EXPECT_TRUE(!fallback_hasbits_received);
EXPECT_TRUE(!fallback_tag_received);
auto hasbits = ReadAndReset<uint32_t>(&fake_msg[kHasBitsOffset]);
EXPECT_EQ(hasbits, 1 << kHasBitIndex);
int offset = 0;
for (char ch : fake_msg) {
EXPECT_EQ(ch, kDND) << " corruption of message at offset " << offset;
++offset;
}
}
}
}
MATCHER_P3(IsEntryForFieldNum, table, field_num, field_numbers_table,
absl::StrCat(negation ? "isn't " : "",

Loading…
Cancel
Save