From 853a55ebbbbc4424fb1d56a31fe07e1ec961c9cd Mon Sep 17 00:00:00 2001 From: Martijn Vels Date: Mon, 13 Feb 2023 13:30:20 -0800 Subject: [PATCH] Cleanup FastTV32S1() and FastTV64S1() functions. - replaces the existing FastV32S1() and FastV64S1() implementations with the FastTV32S1() and FastTV64S1() functions, removing the latter names, and moves the internal template helpers from the header into the .cc file. - defines PROTOBUF_TC_PARAM_NO_DATA_DECL and PROTOBUF_TC_PARAM_NO_DATA_PASS macros to declare functions not using `data`, and to use in call sites calling these functions. END_PUBLIC - adds MUSTTAIL to ToParseLoop() and Error(). The current code was inconsistent on this, and if it doesn't help, it doesn't hurt either and minimizes any possible polition from the call sites. PiperOrigin-RevId: 509317845 --- .../protobuf/generated_message_tctable_decl.h | 23 +- .../protobuf/generated_message_tctable_impl.h | 233 +----------- .../generated_message_tctable_lite.cc | 349 +++++++++++++----- .../generated_message_tctable_lite_test.cc | 15 +- src/google/protobuf/port_def.inc | 13 + 5 files changed, 318 insertions(+), 315 deletions(-) diff --git a/src/google/protobuf/generated_message_tctable_decl.h b/src/google/protobuf/generated_message_tctable_decl.h index dd4845ba88..4ec6973d23 100644 --- a/src/google/protobuf/generated_message_tctable_decl.h +++ b/src/google/protobuf/generated_message_tctable_decl.h @@ -54,6 +54,7 @@ namespace internal { // Additional information about this field: struct TcFieldData { constexpr TcFieldData() : data(0) {} + explicit constexpr TcFieldData(uint64_t data) : data(data) {} // Fast table entry constructor: constexpr TcFieldData(uint16_t coded_tag, uint8_t hasbit_idx, uint8_t aux_idx, @@ -63,6 +64,24 @@ struct TcFieldData { uint64_t{hasbit_idx} << 16 | // uint64_t{coded_tag}) {} + // Constructor to create an explicit 'uninitialized' instance. + // This constructor can be used to pass an uninitialized `data` value to a + // table driven parser function that does not use `data`. The purpose of this + // is that it allows the compiler to reallocate and re-purpose the register + // that is currently holding its value for other data. This reduces register + // allocations inside the highly optimized varint parsing functions. + // + // Applications not using `data` use the `PROTOBUF_TC_PARAM_NO_DATA_DECL` + // macro to declare the standard input arguments with no name for the `data` + // argument. Callers then use the `PROTOBUF_TC_PARAM_NO_DATA_PASS` macro. + // + // Example: + // if (ptr == nullptr) { + // PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); + // } + struct DefaultInit {}; + TcFieldData(DefaultInit) {} // NOLINT(google-explicit-constructor) + // Fields used in fast table parsing: // // Bit: @@ -122,7 +141,9 @@ struct TcFieldData { uint32_t tag() const { return static_cast(data); } uint32_t entry_offset() const { return static_cast(data >> 32); } - uint64_t data; + union { + uint64_t data; + }; }; struct TcParseTableBase; diff --git a/src/google/protobuf/generated_message_tctable_impl.h b/src/google/protobuf/generated_message_tctable_impl.h index 4f2cc2a516..84ee0e1284 100644 --- a/src/google/protobuf/generated_message_tctable_impl.h +++ b/src/google/protobuf/generated_message_tctable_impl.h @@ -292,7 +292,7 @@ class PROTOBUF_EXPORT TcParser final { // the function is used as a way to get a UnknownFieldOps vtable, returned // via the `const char*` return type. See `GetUnknownFieldOps()` - static bool MustFallbackToGeneric(PROTOBUF_TC_PARAM_DECL) { + static bool MustFallbackToGeneric(PROTOBUF_TC_PARAM_NO_DATA_DECL) { return ptr == nullptr; } @@ -359,22 +359,16 @@ class PROTOBUF_EXPORT TcParser final { static const char* FastZ64P1(PROTOBUF_TC_PARAM_DECL); static const char* FastZ64P2(PROTOBUF_TC_PARAM_DECL); - // Manually unrolled and specialized Varint parsing. - template - static const char* FastTV32S1(PROTOBUF_TC_PARAM_DECL); - template - static const char* FastTV64S1(PROTOBUF_TC_PARAM_DECL); - template static constexpr TailCallParseFunc SingularVarintNoZag1() { if (sizeof(FieldType) == 1) { return &FastV8S1; } if (sizeof(FieldType) == 4) { - return &FastTV32S1; + return &FastV32S1; } if (sizeof(FieldType) == 8) { - return &FastTV64S1; + return &FastV64S1; } static_assert(sizeof(FieldType) == 1 || sizeof(FieldType) == 4 || sizeof(FieldType) == 8, @@ -515,12 +509,16 @@ class PROTOBUF_EXPORT TcParser final { // NOTE: Currently, this function only calls the table-level fallback // function, so it should only be called as the fallback from fast table // parsing. - static const char* MiniParse(PROTOBUF_TC_PARAM_DECL); + static const char* MiniParse(PROTOBUF_TC_PARAM_NO_DATA_DECL); static const char* FastEndG1(PROTOBUF_TC_PARAM_DECL); static const char* FastEndG2(PROTOBUF_TC_PARAM_DECL); private: + // Optimized small tag varint parser for int32/int64 + template + static const char* FastVarintS1(PROTOBUF_TC_PARAM_DECL); + friend class GeneratedTcTableLiteTest; static void* MaybeGetSplitBase(MessageLite* msg, const bool is_split, const TcParseTableBase* table); @@ -555,10 +553,10 @@ class PROTOBUF_EXPORT TcParser final { } } - static const char* TagDispatch(PROTOBUF_TC_PARAM_DECL); - static const char* ToTagDispatch(PROTOBUF_TC_PARAM_DECL); - static const char* ToParseLoop(PROTOBUF_TC_PARAM_DECL); - static const char* Error(PROTOBUF_TC_PARAM_DECL); + static const char* TagDispatch(PROTOBUF_TC_PARAM_NO_DATA_DECL); + static const char* ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_DECL); + static const char* ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_DECL); + static const char* Error(PROTOBUF_TC_PARAM_NO_DATA_DECL); static const char* FastUnknownEnumFallback(PROTOBUF_TC_PARAM_DECL); @@ -712,205 +710,14 @@ class PROTOBUF_EXPORT TcParser final { static const char* MpFallback(PROTOBUF_TC_PARAM_DECL); }; -// Shift "byte" left by n * 7 bits, filling vacated bits with ones. -template -inline PROTOBUF_ALWAYS_INLINE int64_t shift_left_fill_with_ones(uint64_t byte, - uint64_t ones) { - return static_cast((byte << (n * 7)) | (ones >> (64 - (n * 7)))); -} - -// Shift "byte" left by n * 7 bits, filling vacated bits with ones, and -// put the new value in res. Return whether the result was negative. -template -inline PROTOBUF_ALWAYS_INLINE bool shift_left_fill_with_ones_was_negative( - uint64_t byte, uint64_t ones, int64_t& res) { -#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__) - // For the first two rounds (ptr[1] and ptr[2]), micro benchmarks show a - // substantial improvement from capturing the sign from the condition code - // register on x86-64. - bool sign_bit; - asm("shldq %3, %2, %1" - : "=@ccs"(sign_bit), "+r"(byte) - : "r"(ones), "i"(n * 7)); - res = static_cast(byte); - return sign_bit; -#else - // Generic fallback: - res = shift_left_fill_with_ones(byte, ones); - return res < 0; -#endif -} - -template -inline PROTOBUF_ALWAYS_INLINE std::pair -ParseFallbackPair(const char* p, int64_t res1) { - constexpr bool kIs64BitVarint = std::is_same::value; - constexpr bool kIs32BitVarint = std::is_same::value; - static_assert(kIs64BitVarint || kIs32BitVarint, - "Only 32 or 64 bit varints are supported"); - auto ptr = reinterpret_cast(p); - - // The algorithm relies on sign extension for each byte to set all high bits - // when the varint continues. It also relies on asserting all of the lower - // bits for each successive byte read. This allows the result to be aggregated - // using a bitwise AND. For example: - // - // 8 1 64 57 ... 24 17 16 9 8 1 - // ptr[0] = 1aaa aaaa ; res1 = 1111 1111 ... 1111 1111 1111 1111 1aaa aaaa - // ptr[1] = 1bbb bbbb ; res2 = 1111 1111 ... 1111 1111 11bb bbbb b111 1111 - // ptr[2] = 1ccc cccc ; res3 = 0000 0000 ... 000c cccc cc11 1111 1111 1111 - // --------------------------------------------- - // res1 & res2 & res3 = 0000 0000 ... 000c cccc ccbb bbbb baaa aaaa - // - // On x86-64, a shld from a single register filled with enough 1s in the high - // bits can accomplish all this in one instruction. It so happens that res1 - // has 57 high bits of ones, which is enough for the largest shift done. - // - // Just as importantly, by keeping results in res1, res2, and res3, we take - // advantage of the superscalar abilities of the CPU. - ABSL_DCHECK_EQ(res1 >> 7, -1); - uint64_t ones = res1; // save the high 1 bits from res1 (input to SHLD) - int64_t res2, res3; // accumulated result chunks - - if (!shift_left_fill_with_ones_was_negative<1>(ptr[1], ones, res2)) - goto done2; - if (!shift_left_fill_with_ones_was_negative<2>(ptr[2], ones, res3)) - goto done3; - - // For the remainder of the chunks, check the sign of the AND result. - res2 &= shift_left_fill_with_ones<3>(ptr[3], ones); - if (res2 >= 0) goto done4; - res1 &= shift_left_fill_with_ones<4>(ptr[4], ones); - if (res1 >= 0) goto done5; - if (kIs64BitVarint) { - res2 &= shift_left_fill_with_ones<5>(ptr[5], ones); - if (res2 >= 0) goto done6; - res3 &= shift_left_fill_with_ones<6>(ptr[6], ones); - if (res3 >= 0) goto done7; - res1 &= shift_left_fill_with_ones<7>(ptr[7], ones); - if (res1 >= 0) goto done8; - res3 &= shift_left_fill_with_ones<8>(ptr[8], ones); - if (res3 >= 0) goto done9; - } else if (kIs32BitVarint) { - if (PROTOBUF_PREDICT_TRUE(!(ptr[5] & 0x80))) goto done6; - if (PROTOBUF_PREDICT_TRUE(!(ptr[6] & 0x80))) goto done7; - if (PROTOBUF_PREDICT_TRUE(!(ptr[7] & 0x80))) goto done8; - if (PROTOBUF_PREDICT_TRUE(!(ptr[8] & 0x80))) goto done9; - } - - // For valid 64bit varints, the 10th byte/ptr[9] should be exactly 1. In this - // case, the continuation bit of ptr[8] already set the top bit of res3 - // correctly, so all we have to do is check that the expected case is true. - if (PROTOBUF_PREDICT_TRUE(kIs64BitVarint && ptr[9] == 1)) goto done10; - - if (PROTOBUF_PREDICT_FALSE(ptr[9] & 0x80)) { - // If the continue bit is set, it is an unterminated varint. - return {nullptr, 0}; - } - - // A zero value of the first bit of the 10th byte represents an - // over-serialized varint. This case should not happen, but if does (say, due - // to a nonconforming serializer), deassert the continuation bit that came - // from ptr[8]. - if (kIs64BitVarint && (ptr[9] & 1) == 0) { -#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__) - // Use a small instruction since this is an uncommon code path. - asm("btcq $63,%0" : "+r"(res3)); -#else - res3 ^= static_cast(1) << 63; -#endif - } - goto done10; - -done2: - return {p + 2, res1 & res2}; -done3: - return {p + 3, res1 & res2 & res3}; -done4: - return {p + 4, res1 & res2 & res3}; -done5: - return {p + 5, res1 & res2 & res3}; -done6: - return {p + 6, res1 & res2 & res3}; -done7: - return {p + 7, res1 & res2 & res3}; -done8: - return {p + 8, res1 & res2 & res3}; -done9: - return {p + 9, res1 & res2 & res3}; -done10: - return {p + 10, res1 & res2 & res3}; -} - -template -PROTOBUF_NOINLINE const char* TcParser::FastTV64S1(PROTOBUF_TC_PARAM_DECL) { - using TagType = uint8_t; - // super-early success test... - if (PROTOBUF_PREDICT_TRUE(((data.data) & 0x80FF) == 0)) { - ptr += sizeof(TagType); // Consume tag - hasbits |= (uint64_t{1} << data.hasbit_idx()); - uint8_t value = data.data >> 8; - RefAt(msg, data.offset()) = value; - ptr += 1; - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); - } - if (PROTOBUF_PREDICT_FALSE(data.coded_tag() != 0)) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); - } - ptr += sizeof(TagType); // Consume tag - hasbits |= (uint64_t{1} << data.hasbit_idx()); - - auto tmp = - ParseFallbackPair(ptr, static_cast(data.data >> 8)); - ptr = tmp.first; - if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) { - data.data = 0; // Indicate to the compiler that we don't need this anymore. - return Error(PROTOBUF_TC_PARAM_PASS); - } - - RefAt(msg, data.offset()) = static_cast(tmp.second); - data.data = 0; // Indicate to the compiler that we don't need this anymore. - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); -} - -template -PROTOBUF_NOINLINE const char* TcParser::FastTV32S1(PROTOBUF_TC_PARAM_DECL) { - using TagType = uint8_t; - // super-early success test... - if (PROTOBUF_PREDICT_TRUE(((data.data) & 0x80FF) == 0)) { - ptr += sizeof(TagType); // Consume tag - hasbits |= (uint64_t{1} << data.hasbit_idx()); - uint8_t value = data.data >> 8; - RefAt(msg, data.offset()) = value; - ptr += 1; - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); - } - if (PROTOBUF_PREDICT_FALSE(data.coded_tag() != 0)) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); - } - ptr += sizeof(TagType); // Consume tag - hasbits |= (uint64_t{1} << data.hasbit_idx()); - - auto tmp = - ParseFallbackPair(ptr, static_cast(data.data >> 8)); - ptr = tmp.first; - if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) { - return Error(PROTOBUF_TC_PARAM_PASS); - } - - RefAt(msg, data.offset()) = static_cast(tmp.second); - data.data = 0; // Indicate to the compiler that we don't need this anymore. - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); -} - // Dispatch to the designated parse function inline PROTOBUF_ALWAYS_INLINE const char* TcParser::TagDispatch( - PROTOBUF_TC_PARAM_DECL) { + PROTOBUF_TC_PARAM_NO_DATA_DECL) { const auto coded_tag = UnalignedLoad(ptr); const size_t idx = coded_tag & table->fast_idx_mask; PROTOBUF_ASSUME((idx & 7) == 0); auto* fast_entry = table->fast_entry(idx >> 3); - data = fast_entry->bits; + TcFieldData data = fast_entry->bits; data.data ^= coded_tag; PROTOBUF_MUSTTAIL return fast_entry->target()(PROTOBUF_TC_PARAM_PASS); } @@ -921,25 +728,23 @@ inline PROTOBUF_ALWAYS_INLINE const char* TcParser::TagDispatch( // mode. Luckily the structure of the algorithm is such that it's always // possible to just return and use the enclosing parse loop as a trampoline. inline PROTOBUF_ALWAYS_INLINE const char* TcParser::ToTagDispatch( - PROTOBUF_TC_PARAM_DECL) { + PROTOBUF_TC_PARAM_NO_DATA_DECL) { constexpr bool always_return = !PROTOBUF_TAILCALL; if (always_return || !ctx->DataAvailable(ptr)) { - PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS); } - PROTOBUF_MUSTTAIL return TagDispatch(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return TagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS); } inline PROTOBUF_ALWAYS_INLINE const char* TcParser::ToParseLoop( - PROTOBUF_TC_PARAM_DECL) { - (void)data; + PROTOBUF_TC_PARAM_NO_DATA_DECL) { (void)ctx; SyncHasbits(msg, hasbits, table); return ptr; } inline PROTOBUF_ALWAYS_INLINE const char* TcParser::Error( - PROTOBUF_TC_PARAM_DECL) { - (void)data; + PROTOBUF_TC_PARAM_NO_DATA_DECL) { (void)ctx; (void)ptr; SyncHasbits(msg, hasbits, table); diff --git a/src/google/protobuf/generated_message_tctable_lite.cc b/src/google/protobuf/generated_message_tctable_lite.cc index acc1915efa..1455a9e286 100644 --- a/src/google/protobuf/generated_message_tctable_lite.cc +++ b/src/google/protobuf/generated_message_tctable_lite.cc @@ -98,7 +98,7 @@ PROTOBUF_NOINLINE const char* TcParser::ParseLoop( // TODO(b/64614992): remove this asm asm("" : "+r"(table)); #endif - ptr = TagDispatch(msg, ptr, ctx, {}, table - 1, 0); + ptr = TagDispatch(msg, ptr, ctx, TcFieldData::DefaultInit(), table - 1, 0); if (ptr == nullptr) break; if (ctx->LastTag() != 1) break; // Ended on terminating tag } @@ -279,7 +279,7 @@ inline PROTOBUF_ALWAYS_INLINE const char* TcParser::MiniParse( ptr = ReadTagInlined(ptr, &tag); if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) { if (export_called_function) *test_out = {Error}; - return Error(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); } auto* entry = FindFieldEntry(table, tag >> 3); @@ -351,8 +351,9 @@ inline PROTOBUF_ALWAYS_INLINE const char* TcParser::MiniParse( PROTOBUF_MUSTTAIL return parse_fn(PROTOBUF_TC_PARAM_PASS); } -PROTOBUF_NOINLINE const char* TcParser::MiniParse(PROTOBUF_TC_PARAM_DECL) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); +PROTOBUF_NOINLINE const char* TcParser::MiniParse( + PROTOBUF_TC_PARAM_NO_DATA_DECL) { + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } PROTOBUF_NOINLINE TcParser::TestMiniParseResult TcParser::TestMiniParse( PROTOBUF_TC_PARAM_DECL) { @@ -369,11 +370,11 @@ const char* TcParser::MpFallback(PROTOBUF_TC_PARAM_DECL) { template const char* TcParser::FastEndGroupImpl(PROTOBUF_TC_PARAM_DECL) { if (PROTOBUF_PREDICT_FALSE(data.coded_tag() != 0)) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } ctx->SetLastTag(data.decoded_tag()); ptr += sizeof(TagType); - PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS); } PROTOBUF_NOINLINE const char* TcParser::FastEndG1(PROTOBUF_TC_PARAM_DECL) { @@ -403,7 +404,7 @@ template inline PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularParseMessageAuxImpl( PROTOBUF_TC_PARAM_DECL) { if (PROTOBUF_PREDICT_FALSE(data.coded_tag() != 0)) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } auto saved_tag = UnalignedLoad(ptr); ptr += sizeof(TagType); @@ -478,7 +479,7 @@ template inline PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedParseMessageAuxImpl( PROTOBUF_TC_PARAM_DECL) { if (PROTOBUF_PREDICT_FALSE(data.coded_tag() != 0)) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } const auto expected_tag = UnalignedLoad(ptr); const auto aux = *table->field_aux(data.aux_idx()); @@ -502,14 +503,14 @@ inline PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedParseMessageAuxImpl( } } if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) { - PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); } if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) { - PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS); } } while (UnalignedLoad(ptr) == expected_tag); - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS); } PROTOBUF_NOINLINE const char* TcParser::FastMdR1(PROTOBUF_TC_PARAM_DECL) { @@ -560,13 +561,13 @@ template PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularFixed( PROTOBUF_TC_PARAM_DECL) { if (PROTOBUF_PREDICT_FALSE(data.coded_tag() != 0)) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } ptr += sizeof(TagType); // Consume tag hasbits |= (uint64_t{1} << data.hasbit_idx()); RefAt(msg, data.offset()) = UnalignedLoad(ptr); ptr += sizeof(LayoutType); - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS); } PROTOBUF_NOINLINE const char* TcParser::FastF32S1(PROTOBUF_TC_PARAM_DECL) { @@ -598,7 +599,7 @@ PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedFixed( if (data.coded_tag() == 0) { return PackedFixed(PROTOBUF_TC_PARAM_PASS); } else { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } } auto& field = RefAt>(msg, data.offset()); @@ -607,7 +608,7 @@ PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedFixed( field.Add(UnalignedLoad(ptr + sizeof(TagType))); ptr += sizeof(TagType) + sizeof(LayoutType); } while (ctx->DataAvailable(ptr) && UnalignedLoad(ptr) == tag); - return ToParseLoop(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS); } PROTOBUF_NOINLINE const char* TcParser::FastF32R1(PROTOBUF_TC_PARAM_DECL) { @@ -643,7 +644,7 @@ const char* TcParser::PackedFixed(PROTOBUF_TC_PARAM_DECL) { if (data.coded_tag() == 0) { return RepeatedFixed(PROTOBUF_TC_PARAM_PASS); } else { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } } ptr += sizeof(TagType); @@ -680,6 +681,136 @@ PROTOBUF_NOINLINE const char* TcParser::FastF64P2(PROTOBUF_TC_PARAM_DECL) { namespace { +// Shift "byte" left by n * 7 bits, filling vacated bits with ones. +template +inline PROTOBUF_ALWAYS_INLINE int64_t shift_left_fill_with_ones(uint64_t byte, + uint64_t ones) { + return static_cast((byte << (n * 7)) | (ones >> (64 - (n * 7)))); +} + +// Shift "byte" left by n * 7 bits, filling vacated bits with ones, and +// put the new value in res. Return whether the result was negative. +template +inline PROTOBUF_ALWAYS_INLINE bool shift_left_fill_with_ones_was_negative( + uint64_t byte, uint64_t ones, int64_t& res) { +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__) + // For the first two rounds (up to 2 varint bytes), micro benchmarks show a + // substantial improvement from capturing the sign from the condition code + // register on x86-64. + bool sign_bit; + asm("shldq %3, %2, %1" + : "=@ccs"(sign_bit), "+r"(byte) + : "r"(ones), "i"(n * 7)); + res = static_cast(byte); + return sign_bit; +#else + // Generic fallback: + res = shift_left_fill_with_ones(byte, ones); + return res < 0; +#endif +} + +template +inline PROTOBUF_ALWAYS_INLINE std::pair +ParseFallbackPair(const char* p, int64_t res1) { + constexpr bool kIs64BitVarint = std::is_same::value; + constexpr bool kIs32BitVarint = std::is_same::value; + static_assert(kIs64BitVarint || kIs32BitVarint, + "Only 32 or 64 bit varints are supported"); + auto ptr = reinterpret_cast(p); + + // The algorithm relies on sign extension for each byte to set all high bits + // when the varint continues. It also relies on asserting all of the lower + // bits for each successive byte read. This allows the result to be aggregated + // using a bitwise AND. For example: + // + // 8 1 64 57 ... 24 17 16 9 8 1 + // ptr[0] = 1aaa aaaa ; res1 = 1111 1111 ... 1111 1111 1111 1111 1aaa aaaa + // ptr[1] = 1bbb bbbb ; res2 = 1111 1111 ... 1111 1111 11bb bbbb b111 1111 + // ptr[2] = 0ccc cccc ; res3 = 0000 0000 ... 000c cccc cc11 1111 1111 1111 + // --------------------------------------------- + // res1 & res2 & res3 = 0000 0000 ... 000c cccc ccbb bbbb baaa aaaa + // + // On x86-64, a shld from a single register filled with enough 1s in the high + // bits can accomplish all this in one instruction. It so happens that res1 + // has 57 high bits of ones, which is enough for the largest shift done. + // + // Just as importantly, by keeping results in res1, res2, and res3, we take + // advantage of the superscalar abilities of the CPU. + ABSL_DCHECK_EQ(res1 >> 7, -1); + uint64_t ones = res1; // save the high 1 bits from res1 (input to SHLD) + int64_t res2, res3; // accumulated result chunks + + if (!shift_left_fill_with_ones_was_negative<1>(ptr[1], ones, res2)) + goto done2; + if (!shift_left_fill_with_ones_was_negative<2>(ptr[2], ones, res3)) + goto done3; + + // For the remainder of the chunks, check the sign of the AND result. + res2 &= shift_left_fill_with_ones<3>(ptr[3], ones); + if (res2 >= 0) goto done4; + res1 &= shift_left_fill_with_ones<4>(ptr[4], ones); + if (res1 >= 0) goto done5; + if (kIs64BitVarint) { + res2 &= shift_left_fill_with_ones<5>(ptr[5], ones); + if (res2 >= 0) goto done6; + res3 &= shift_left_fill_with_ones<6>(ptr[6], ones); + if (res3 >= 0) goto done7; + res1 &= shift_left_fill_with_ones<7>(ptr[7], ones); + if (res1 >= 0) goto done8; + res3 &= shift_left_fill_with_ones<8>(ptr[8], ones); + if (res3 >= 0) goto done9; + } else if (kIs32BitVarint) { + if (PROTOBUF_PREDICT_TRUE(!(ptr[5] & 0x80))) goto done6; + if (PROTOBUF_PREDICT_TRUE(!(ptr[6] & 0x80))) goto done7; + if (PROTOBUF_PREDICT_TRUE(!(ptr[7] & 0x80))) goto done8; + if (PROTOBUF_PREDICT_TRUE(!(ptr[8] & 0x80))) goto done9; + } + + // For valid 64bit varints, the 10th byte/ptr[9] should be exactly 1. In this + // case, the continuation bit of ptr[8] already set the top bit of res3 + // correctly, so all we have to do is check that the expected case is true. + if (PROTOBUF_PREDICT_TRUE(kIs64BitVarint && ptr[9] == 1)) goto done10; + + if (PROTOBUF_PREDICT_FALSE(ptr[9] & 0x80)) { + // If the continue bit is set, it is an unterminated varint. + return {nullptr, 0}; + } + + // A zero value of the first bit of the 10th byte represents an + // over-serialized varint. This case should not happen, but if does (say, due + // to a nonconforming serializer), deassert the continuation bit that came + // from ptr[8]. + if (kIs64BitVarint && (ptr[9] & 1) == 0) { +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__) + // Use a small instruction since this is an uncommon code path. + asm("btcq $63,%0" : "+r"(res3)); +#else + res3 ^= static_cast(1) << 63; +#endif + } + goto done10; + +done2: + return {p + 2, res1 & res2}; +done3: + return {p + 3, res1 & res2 & res3}; +done4: + return {p + 4, res1 & res2 & res3}; +done5: + return {p + 5, res1 & res2 & res3}; +done6: + return {p + 6, res1 & res2 & res3}; +done7: + return {p + 7, res1 & res2 & res3}; +done8: + return {p + 8, res1 & res2 & res3}; +done9: + return {p + 9, res1 & res2 & res3}; +done10: + return {p + 10, res1 & res2 & res3}; +} + template inline PROTOBUF_ALWAYS_INLINE const char* ParseVarint(const char* p, Type* value) { @@ -792,7 +923,7 @@ template PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularVarint( PROTOBUF_TC_PARAM_DECL) { if (PROTOBUF_PREDICT_FALSE(data.coded_tag() != 0)) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } ptr += sizeof(TagType); // Consume tag hasbits |= (uint64_t{1} << data.hasbit_idx()); @@ -807,7 +938,7 @@ PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularVarint( RefAt(msg, data.offset()) = ZigZagDecodeHelper(static_cast(*ptr++)); - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS); } template @@ -842,11 +973,41 @@ PROTOBUF_NOINLINE const char* TcParser::SingularVarBigint( hasbits = spill.hasbits; if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) { - return Error(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); } RefAt(msg, data.offset()) = ZigZagDecodeHelper(tmp); - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS); +} + +template +PROTOBUF_ALWAYS_INLINE const char* TcParser::FastVarintS1( + PROTOBUF_TC_PARAM_DECL) { + using TagType = uint8_t; + // super-early success test... + if (PROTOBUF_PREDICT_TRUE(((data.data) & 0x80FF) == 0)) { + ptr += sizeof(TagType); // Consume tag + hasbits |= (uint64_t{1} << data.hasbit_idx()); + uint8_t value = data.data >> 8; + RefAt(msg, data.offset()) = value; + ptr += 1; + PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS); + } + if (PROTOBUF_PREDICT_FALSE(data.coded_tag() != 0)) { + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); + } + ptr += sizeof(TagType); // Consume tag + hasbits |= (uint64_t{1} << data.hasbit_idx()); + + auto tmp = + ParseFallbackPair(ptr, static_cast(data.data >> 8)); + ptr = tmp.first; + if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) { + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); + } + + RefAt(msg, data.offset()) = tmp.second; + PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS); } PROTOBUF_NOINLINE const char* TcParser::FastV8S1(PROTOBUF_TC_PARAM_DECL) { @@ -870,13 +1031,13 @@ PROTOBUF_NOINLINE const char* TcParser::FastV8S1(PROTOBUF_TC_PARAM_DECL) { ptr += sizeof(TagType) + 1; // Consume the tag and the value. hasbits |= (uint64_t{1} << data.hasbit_idx()); - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS); } // If it didn't match above either the tag is wrong, or the value is encoded // non-canonically. // Jump to MiniParse as wrong tag is the most probable reason. - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } PROTOBUF_NOINLINE const char* TcParser::FastV8S2(PROTOBUF_TC_PARAM_DECL) { @@ -884,16 +1045,14 @@ PROTOBUF_NOINLINE const char* TcParser::FastV8S2(PROTOBUF_TC_PARAM_DECL) { PROTOBUF_TC_PARAM_PASS); } PROTOBUF_NOINLINE const char* TcParser::FastV32S1(PROTOBUF_TC_PARAM_DECL) { - PROTOBUF_MUSTTAIL return SingularVarint( - PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return FastVarintS1(PROTOBUF_TC_PARAM_PASS); } PROTOBUF_NOINLINE const char* TcParser::FastV32S2(PROTOBUF_TC_PARAM_DECL) { PROTOBUF_MUSTTAIL return SingularVarint( PROTOBUF_TC_PARAM_PASS); } PROTOBUF_NOINLINE const char* TcParser::FastV64S1(PROTOBUF_TC_PARAM_DECL) { - PROTOBUF_MUSTTAIL return SingularVarint( - PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return FastVarintS1(PROTOBUF_TC_PARAM_PASS); } PROTOBUF_NOINLINE const char* TcParser::FastV64S2(PROTOBUF_TC_PARAM_DECL) { PROTOBUF_MUSTTAIL return SingularVarint( @@ -926,7 +1085,7 @@ PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedVarint( if (data.coded_tag() == 0) { return PackedVarint(PROTOBUF_TC_PARAM_PASS); } else { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } } auto& field = RefAt>(msg, data.offset()); @@ -936,14 +1095,14 @@ PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedVarint( FieldType tmp; ptr = ParseVarint(ptr, &tmp); if (ptr == nullptr) { - return Error(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); } field.Add(ZigZagDecodeHelper(tmp)); if (!ctx->DataAvailable(ptr)) { break; } } while (UnalignedLoad(ptr) == expected_tag); - return ToParseLoop(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS); } PROTOBUF_NOINLINE const char* TcParser::FastV8R1(PROTOBUF_TC_PARAM_DECL) { @@ -996,7 +1155,7 @@ const char* TcParser::PackedVarint(PROTOBUF_TC_PARAM_DECL) { if (data.coded_tag() == 0) { return RepeatedVarint(PROTOBUF_TC_PARAM_PASS); } else { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } } ptr += sizeof(TagType); @@ -1071,7 +1230,7 @@ PROTOBUF_NOINLINE const char* TcParser::FastUnknownEnumFallback( uint32_t tag; ptr = ReadTag(ptr, &tag); if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) { - return Error(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); } data.data = tag; PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS); @@ -1081,14 +1240,14 @@ template PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularEnum( PROTOBUF_TC_PARAM_DECL) { if (PROTOBUF_PREDICT_FALSE(data.coded_tag() != 0)) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } const char* ptr2 = ptr; // Save for unknown enum case ptr += sizeof(TagType); // Consume tag uint64_t tmp; ptr = ParseVarint(ptr, &tmp); if (ptr == nullptr) { - return Error(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); } const TcParseTableBase::FieldAux aux = *table->field_aux(data.aux_idx()); if (PROTOBUF_PREDICT_FALSE( @@ -1098,7 +1257,7 @@ PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularEnum( } hasbits |= (uint64_t{1} << data.hasbit_idx()); RefAt(msg, data.offset()) = tmp; - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS); } PROTOBUF_NOINLINE const char* TcParser::FastErS1(PROTOBUF_TC_PARAM_DECL) { @@ -1126,7 +1285,7 @@ const char* TcParser::RepeatedEnum(PROTOBUF_TC_PARAM_DECL) { PROTOBUF_MUSTTAIL return PackedEnum( PROTOBUF_TC_PARAM_PASS); } else { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } } auto& field = RefAt>(msg, data.offset()); @@ -1138,7 +1297,7 @@ const char* TcParser::RepeatedEnum(PROTOBUF_TC_PARAM_DECL) { uint64_t tmp; ptr = ParseVarint(ptr, &tmp); if (ptr == nullptr) { - return Error(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); } if (PROTOBUF_PREDICT_FALSE( !EnumIsValidAux(static_cast(tmp), xform_val, aux))) { @@ -1152,7 +1311,7 @@ const char* TcParser::RepeatedEnum(PROTOBUF_TC_PARAM_DECL) { break; } } while (UnalignedLoad(ptr) == expected_tag); - return ToParseLoop(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS); } const TcParser::UnknownFieldOps& TcParser::GetUnknownFieldOps( @@ -1180,7 +1339,7 @@ const char* TcParser::PackedEnum(PROTOBUF_TC_PARAM_DECL) { PROTOBUF_MUSTTAIL return RepeatedEnum( PROTOBUF_TC_PARAM_PASS); } else { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } } const auto saved_tag = UnalignedLoad(ptr); @@ -1237,18 +1396,18 @@ template PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularEnumSmallRange( PROTOBUF_TC_PARAM_DECL) { if (PROTOBUF_PREDICT_FALSE(data.coded_tag() != 0)) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } uint8_t v = ptr[sizeof(TagType)]; if (PROTOBUF_PREDICT_FALSE(min > v || v > data.aux_idx())) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } RefAt(msg, data.offset()) = v; ptr += sizeof(TagType) + 1; hasbits |= (uint64_t{1} << data.hasbit_idx()); - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS); } PROTOBUF_NOINLINE const char* TcParser::FastEr0S1(PROTOBUF_TC_PARAM_DECL) { @@ -1279,7 +1438,7 @@ const char* TcParser::RepeatedEnumSmallRange(PROTOBUF_TC_PARAM_DECL) { PROTOBUF_MUSTTAIL return PackedEnumSmallRange( PROTOBUF_TC_PARAM_PASS); } else { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } } auto& field = RefAt>(msg, data.offset()); @@ -1288,14 +1447,14 @@ const char* TcParser::RepeatedEnumSmallRange(PROTOBUF_TC_PARAM_DECL) { do { uint8_t v = ptr[sizeof(TagType)]; if (PROTOBUF_PREDICT_FALSE(min > v || v > max)) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } field.Add(static_cast(v)); ptr += sizeof(TagType) + 1; if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) break; } while (UnalignedLoad(ptr) == expected_tag); - PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS); } PROTOBUF_NOINLINE const char* TcParser::FastEr0R1(PROTOBUF_TC_PARAM_DECL) { @@ -1324,7 +1483,7 @@ const char* TcParser::PackedEnumSmallRange(PROTOBUF_TC_PARAM_DECL) { PROTOBUF_MUSTTAIL return RepeatedEnumSmallRange( PROTOBUF_TC_PARAM_PASS); } else { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } } @@ -1414,7 +1573,7 @@ template PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularString( PROTOBUF_TC_PARAM_DECL) { if (PROTOBUF_PREDICT_FALSE(data.coded_tag() != 0)) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } auto saved_tag = UnalignedLoad(ptr); ptr += sizeof(TagType); @@ -1427,20 +1586,24 @@ PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularString( } else { ptr = ReadStringNoArena(msg, ptr, ctx, data.aux_idx(), table, field); } - if (ptr == nullptr) return Error(PROTOBUF_TC_PARAM_PASS); + if (ptr == nullptr) { + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); + } switch (utf8) { case kNoUtf8: #ifdef NDEBUG case kUtf8ValidateOnly: #endif - return ToParseLoop(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS); default: if (PROTOBUF_PREDICT_TRUE(IsValidUTF8(field))) { - return ToParseLoop(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS); } ReportFastUtf8Error(FastDecodeTag(saved_tag), table); - return utf8 == kUtf8 ? Error(PROTOBUF_TC_PARAM_PASS) - : ToParseLoop(PROTOBUF_TC_PARAM_PASS); + if (utf8 == kUtf8) { + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); + } + PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS); } } @@ -1474,49 +1637,49 @@ PROTOBUF_NOINLINE const char* TcParser::FastUS2(PROTOBUF_TC_PARAM_DECL) { // Inlined string variants: const char* TcParser::FastBiS1(PROTOBUF_TC_PARAM_DECL) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } const char* TcParser::FastBiS2(PROTOBUF_TC_PARAM_DECL) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } const char* TcParser::FastSiS1(PROTOBUF_TC_PARAM_DECL) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } const char* TcParser::FastSiS2(PROTOBUF_TC_PARAM_DECL) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } const char* TcParser::FastUiS1(PROTOBUF_TC_PARAM_DECL) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } const char* TcParser::FastUiS2(PROTOBUF_TC_PARAM_DECL) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } // Corded string variants: const char* TcParser::FastBcS1(PROTOBUF_TC_PARAM_DECL) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } const char* TcParser::FastBcS2(PROTOBUF_TC_PARAM_DECL) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } const char* TcParser::FastScS1(PROTOBUF_TC_PARAM_DECL) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } const char* TcParser::FastScS2(PROTOBUF_TC_PARAM_DECL) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } const char* TcParser::FastUcS1(PROTOBUF_TC_PARAM_DECL) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } const char* TcParser::FastUcS2(PROTOBUF_TC_PARAM_DECL) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } template PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedString( PROTOBUF_TC_PARAM_DECL) { if (PROTOBUF_PREDICT_FALSE(data.coded_tag() != 0)) { - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS); } const auto expected_tag = UnalignedLoad(ptr); auto& field = RefAt(msg, data.offset()); @@ -1549,7 +1712,7 @@ PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedString( ptr = ParseRepeatedStringOnce(ptr, arena, serial_arena, ctx, field); if (PROTOBUF_PREDICT_FALSE(ptr == nullptr || !validate_last_string())) { - return Error(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); } if (!ctx->DataAvailable(ptr)) break; } while (UnalignedLoad(ptr) == expected_tag); @@ -1559,12 +1722,12 @@ PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedString( std::string* str = field.Add(); ptr = InlineGreedyStringParser(str, ptr, ctx); if (PROTOBUF_PREDICT_FALSE(ptr == nullptr || !validate_last_string())) { - return Error(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); } if (!ctx->DataAvailable(ptr)) break; } while (UnalignedLoad(ptr) == expected_tag); } - return ToParseLoop(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS); } PROTOBUF_NOINLINE const char* TcParser::FastBR1(PROTOBUF_TC_PARAM_DECL) { @@ -1739,7 +1902,7 @@ PROTOBUF_NOINLINE const char* TcParser::MpFixed(PROTOBUF_TC_PARAM_DECL) { RefAt(base, entry.offset) = UnalignedLoad(ptr); ptr += sizeof(uint32_t); } - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS); } PROTOBUF_NOINLINE const char* TcParser::MpRepeatedFixed( @@ -1788,7 +1951,7 @@ PROTOBUF_NOINLINE const char* TcParser::MpRepeatedFixed( } while (next_tag == decoded_tag); } - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS); } PROTOBUF_NOINLINE const char* TcParser::MpPackedFixed(PROTOBUF_TC_PARAM_DECL) { @@ -1817,9 +1980,9 @@ PROTOBUF_NOINLINE const char* TcParser::MpPackedFixed(PROTOBUF_TC_PARAM_DECL) { } if (ptr == nullptr) { - return Error(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); } - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS); } template @@ -1844,7 +2007,9 @@ PROTOBUF_NOINLINE const char* TcParser::MpVarint(PROTOBUF_TC_PARAM_DECL) { const char* ptr2 = ptr; // save for unknown enum case uint64_t tmp; ptr = ParseVarint(ptr, &tmp); - if (ptr == nullptr) return Error(PROTOBUF_TC_PARAM_PASS); + if (ptr == nullptr) { + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); + } // Transform and/or validate the value uint16_t rep = type_card & field_layout::kRepMask; @@ -1881,7 +2046,7 @@ PROTOBUF_NOINLINE const char* TcParser::MpVarint(PROTOBUF_TC_PARAM_DECL) { RefAt(base, entry.offset) = static_cast(tmp); } - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS); } PROTOBUF_NOINLINE const char* TcParser::MpRepeatedVarint( @@ -1911,11 +2076,15 @@ PROTOBUF_NOINLINE const char* TcParser::MpRepeatedVarint( do { uint64_t tmp; ptr = ParseVarint(ptr2, &tmp); - if (ptr == nullptr) return Error(PROTOBUF_TC_PARAM_PASS); + if (ptr == nullptr) { + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); + } field.Add(is_zigzag ? WireFormatLite::ZigZagDecode64(tmp) : tmp); if (!ctx->DataAvailable(ptr)) break; ptr2 = ReadTag(ptr, &next_tag); - if (ptr2 == nullptr) return Error(PROTOBUF_TC_PARAM_PASS); + if (ptr2 == nullptr) { + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); + } } while (next_tag == decoded_tag); } else if (rep == field_layout::kRep32Bits) { auto& field = RefAt>(msg, entry.offset); @@ -1924,7 +2093,9 @@ PROTOBUF_NOINLINE const char* TcParser::MpRepeatedVarint( do { uint64_t tmp; ptr = ParseVarint(ptr2, &tmp); - if (ptr == nullptr) return Error(PROTOBUF_TC_PARAM_PASS); + if (ptr == nullptr) { + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); + } if (is_validated_enum) { if (!EnumIsValidAux(tmp, xform_val, *table->field_aux(&entry))) { ptr = ptr2; @@ -1936,7 +2107,9 @@ PROTOBUF_NOINLINE const char* TcParser::MpRepeatedVarint( field.Add(tmp); if (!ctx->DataAvailable(ptr)) break; ptr2 = ReadTag(ptr, &next_tag); - if (ptr2 == nullptr) return Error(PROTOBUF_TC_PARAM_PASS); + if (ptr2 == nullptr) { + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); + } } while (next_tag == decoded_tag); } else { ABSL_DCHECK_EQ(rep, static_cast(field_layout::kRep8Bits)); @@ -1946,15 +2119,19 @@ PROTOBUF_NOINLINE const char* TcParser::MpRepeatedVarint( do { uint64_t tmp; ptr = ParseVarint(ptr2, &tmp); - if (ptr == nullptr) return Error(PROTOBUF_TC_PARAM_PASS); + if (ptr == nullptr) { + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); + } field.Add(static_cast(tmp)); if (!ctx->DataAvailable(ptr)) break; ptr2 = ReadTag(ptr, &next_tag); - if (ptr2 == nullptr) return Error(PROTOBUF_TC_PARAM_PASS); + if (ptr2 == nullptr) { + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); + } } while (next_tag == decoded_tag); } - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS); } PROTOBUF_NOINLINE const char* TcParser::MpPackedVarint(PROTOBUF_TC_PARAM_DECL) { @@ -2005,7 +2182,7 @@ PROTOBUF_NOINLINE const char* TcParser::MpPackedVarint(PROTOBUF_TC_PARAM_DECL) { ptr, [field](uint64_t value) { field->Add(value); }); } - return Error(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); } bool TcParser::MpVerifyUtf8(absl::string_view wire_bytes, @@ -2083,9 +2260,9 @@ PROTOBUF_NOINLINE const char* TcParser::MpString(PROTOBUF_TC_PARAM_DECL) { } if (ptr == nullptr || !is_valid) { - return Error(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); } - return ToParseLoop(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS); } PROTOBUF_ALWAYS_INLINE const char* TcParser::ParseRepeatedStringOnce( @@ -2132,7 +2309,7 @@ PROTOBUF_NOINLINE const char* TcParser::MpRepeatedString( if (PROTOBUF_PREDICT_FALSE(ptr == nullptr || !MpVerifyUtf8(field[field.size() - 1], table, entry, xform_val))) { - return Error(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); } if (!ctx->DataAvailable(ptr)) break; ptr2 = ReadTag(ptr, &next_tag); @@ -2145,7 +2322,7 @@ PROTOBUF_NOINLINE const char* TcParser::MpRepeatedString( if (PROTOBUF_PREDICT_FALSE( ptr == nullptr || !MpVerifyUtf8(*str, table, entry, xform_val))) { - return Error(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS); } if (!ctx->DataAvailable(ptr)) break; ptr2 = ReadTag(ptr, &next_tag); @@ -2162,7 +2339,7 @@ PROTOBUF_NOINLINE const char* TcParser::MpRepeatedString( #endif } - return ToParseLoop(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS); } template diff --git a/src/google/protobuf/generated_message_tctable_lite_test.cc b/src/google/protobuf/generated_message_tctable_lite_test.cc index 0d923aeb4a..8aeacf57a2 100644 --- a/src/google/protobuf/generated_message_tctable_lite_test.cc +++ b/src/google/protobuf/generated_message_tctable_lite_test.cc @@ -125,8 +125,7 @@ TEST(FastVarints, NameHere) { // clang-format on uint8_t serialize_buffer[64]; - // TODO(b/27721823): cleanup test cases for 'former' TV* functions. - for (int size : {8, 32, 64, -8, -32, -64}) { + for (int size : {8, 32, 64}) { SCOPED_TRACE(size); auto next_i = [](uint64_t i) { // if i + 1 is a power of two, return that. @@ -193,21 +192,12 @@ TEST(FastVarints, NameHere) { case 8: fn = &TcParser::FastV8S1; break; - case -8: - fn = &TcParser::FastV8S1; - break; case 32: fn = &TcParser::FastV32S1; break; - case -32: - fn = &TcParser::FastTV32S1; - break; case 64: fn = &TcParser::FastV64S1; break; - case -64: - fn = &TcParser::FastTV64S1; - break; } fallback_ptr_received = absl::nullopt; fallback_hasbits_received = absl::nullopt; @@ -216,7 +206,6 @@ TEST(FastVarints, NameHere) { Xor2SerializedBytes(parse_table.fast_entries[0].bits, ptr), &parse_table.header, /*hasbits=*/0); switch (size) { - case -8: case 8: { if (end_ptr == nullptr) { // If end_ptr is nullptr, that means the FastParser gave up and @@ -241,7 +230,6 @@ TEST(FastVarints, NameHere) { EXPECT_EQ(actual_field, static_cast(i)) // << " hex: " << absl::StrCat(absl::Hex(actual_field)); }; break; - case -32: case 32: { ASSERT_TRUE(end_ptr); ASSERT_EQ(end_ptr - ptr, serialized.size()); @@ -250,7 +238,6 @@ TEST(FastVarints, NameHere) { EXPECT_EQ(actual_field, static_cast(i)) // << " hex: " << absl::StrCat(absl::Hex(actual_field)); }; break; - case -64: case 64: { ASSERT_EQ(end_ptr - ptr, serialized.size()); diff --git a/src/google/protobuf/port_def.inc b/src/google/protobuf/port_def.inc index c38f7a218d..d87b6b9929 100644 --- a/src/google/protobuf/port_def.inc +++ b/src/google/protobuf/port_def.inc @@ -856,6 +856,19 @@ static_assert(PROTOBUF_CPLUSPLUS_MIN(201402L), "Protobuf only supports C++14 and // PROTOBUF_TC_PARAM_PASS passes values to match PROTOBUF_TC_PARAM_DECL. #define PROTOBUF_TC_PARAM_PASS msg, ptr, ctx, data, table, hasbits +// PROTOBUF_TC_PARAM_NO_DATA_DECL and PROTOBUF_TC_PARAM_NO_DATA_PASS provide the +// exact same ABI as above, except that they don't name or pass the `data` +// argument. Specific functions such as `Error() and `ToTagDispatch()` don't +// use the `data` argument. By not passing `data` down the call stack, we free +// up the register holding that value, which may matter in highly optimized +// functions such as varint parsing. +#define PROTOBUF_TC_PARAM_NO_DATA_DECL \ + ::google::protobuf::MessageLite *msg, const char *ptr, \ + ::google::protobuf::internal::ParseContext *ctx, ::google::protobuf::internal::TcFieldData, \ + const ::google::protobuf::internal::TcParseTableBase *table, uint64_t hasbits +#define PROTOBUF_TC_PARAM_NO_DATA_PASS \ + msg, ptr, ctx, ::google::protobuf::internal::TcFieldData::DefaultInit(), table, hasbits + #ifdef PROTOBUF_UNUSED #error PROTOBUF_UNUSED was previously defined #endif