From 8cfd02f590cf9455c22a5812a41dce0663e2e48a Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Mon, 13 Feb 2023 13:37:38 -0800 Subject: [PATCH] Add tracing for limit push/pop in sanitizer mode to make sure we never read an uninitialized one, and we never fail to pop one. Fix a couple of cases where invalid input lead to undefined behavior. PiperOrigin-RevId: 509319815 --- src/google/protobuf/message_unittest.inc | 25 +++++++ src/google/protobuf/parse_context.cc | 15 ++--- src/google/protobuf/parse_context.h | 86 ++++++++++++++++++------ 3 files changed, 96 insertions(+), 30 deletions(-) diff --git a/src/google/protobuf/message_unittest.inc b/src/google/protobuf/message_unittest.inc index 204da69b48..c41d8b3d0e 100644 --- a/src/google/protobuf/message_unittest.inc +++ b/src/google/protobuf/message_unittest.inc @@ -1166,6 +1166,31 @@ TEST(MESSAGE_TEST_NAME, PreservesFloatingPointNegative0) { std::signbit(out_message.optional_double())); } +TEST(MESSAGE_TEST_NAME, + RegressionTestForParseMessageReadingUninitializedLimit) { + UNITTEST::TestAllTypes in_message; + in_message.mutable_optional_nested_message(); + std::string serialized = in_message.SerializeAsString(); + // We expect this to have 3 bytes: two for the tag, and one for the zero size. + // Break the size by making it overlong. + ASSERT_EQ(serialized.size(), 3); + serialized.back() = '\200'; + serialized += std::string(10, '\200'); + EXPECT_FALSE(in_message.ParseFromString(serialized)); +} + +TEST(MESSAGE_TEST_NAME, + RegressionTestForParseMessageWithSizeBeyondInputFailsToPopLimit) { + UNITTEST::TestAllTypes in_message; + in_message.mutable_optional_nested_message(); + std::string serialized = in_message.SerializeAsString(); + // We expect this to have 3 bytes: two for the tag, and one for the zero size. + // Make the size a valid varint, but it overflows in the input. + ASSERT_EQ(serialized.size(), 3); + serialized.back() = 10; + EXPECT_FALSE(in_message.ParseFromString(serialized)); +} + const uint8_t* SkipTag(const uint8_t* buf) { while (*buf & 0x80) ++buf; ++buf; diff --git a/src/google/protobuf/parse_context.cc b/src/google/protobuf/parse_context.cc index 11d21819fc..b96e7ac74a 100644 --- a/src/google/protobuf/parse_context.cc +++ b/src/google/protobuf/parse_context.cc @@ -319,26 +319,19 @@ const char* EpsCopyInputStream::InitFrom(io::ZeroCopyInputStream* zcis) { } const char* ParseContext::ReadSizeAndPushLimitAndDepth(const char* ptr, - int* old_limit) { - int size = ReadSize(&ptr); - if (PROTOBUF_PREDICT_FALSE(!ptr) || depth_ <= 0) { - *old_limit = 0; // Make sure this isn't uninitialized even on error return - return nullptr; - } - *old_limit = PushLimit(ptr, size); - --depth_; - return ptr; + LimitToken* old_limit) { + return ReadSizeAndPushLimitAndDepthInlined(ptr, old_limit); } const char* ParseContext::ParseMessage(MessageLite* msg, const char* ptr) { - int old; + LimitToken old; ptr = ReadSizeAndPushLimitAndDepth(ptr, &old); if (ptr == nullptr) return ptr; auto old_depth = depth_; ptr = msg->_InternalParse(ptr, this); if (ptr != nullptr) ABSL_DCHECK_EQ(old_depth, depth_); depth_++; - if (!PopLimit(old)) return nullptr; + if (!PopLimit(std::move(old))) return nullptr; return ptr; } diff --git a/src/google/protobuf/parse_context.h b/src/google/protobuf/parse_context.h index e5b13a027e..0ddaa2604b 100644 --- a/src/google/protobuf/parse_context.h +++ b/src/google/protobuf/parse_context.h @@ -36,11 +36,13 @@ #include #include +#include "absl/base/config.h" #include "absl/log/absl_check.h" #include "absl/log/absl_log.h" #include "absl/strings/cord.h" #include "absl/strings/internal/resize_uninitialized.h" #include "absl/strings/string_view.h" +#include "absl/types/optional.h" #include "google/protobuf/arena.h" #include "google/protobuf/arenastring.h" #include "google/protobuf/endian.h" @@ -131,8 +133,53 @@ class PROTOBUF_EXPORT EpsCopyInputStream { if (count > 0) StreamBackUp(count); } +#if defined(ABSL_HAVE_ADDRESS_SANITIZER) || defined(ABSL_HAVE_MEMORY_SANITIZER) + // In sanitizer mode we use an optional to guarantee that: + // - We do not read an uninitialized token. + // - Every non-empty token is moved from and consumed. + class LimitToken { + public: + LimitToken() = default; + explicit LimitToken(int token) : token_(token) {} + LimitToken(LimitToken&& other) { *this = std::move(other); } + LimitToken& operator=(LimitToken&& other) { + token_ = std::exchange(other.token_, absl::nullopt); + return *this; + } + + ~LimitToken() { ABSL_CHECK(!token_.has_value()); } + + LimitToken(const LimitToken&) = delete; + LimitToken& operator=(const LimitToken&) = delete; + + int token() && { + ABSL_CHECK(token_.has_value()); + return *std::exchange(token_, absl::nullopt); + } + + private: + absl::optional token_; + }; +#else + class LimitToken { + public: + LimitToken() = default; + explicit LimitToken(int token) : token_(token) {} + LimitToken(LimitToken&&) = default; + LimitToken& operator=(LimitToken&&) = default; + + LimitToken(const LimitToken&) = delete; + LimitToken& operator=(const LimitToken&) = delete; + + int token() const { return token_; } + + private: + int token_; + }; +#endif + // If return value is negative it's an error - PROTOBUF_NODISCARD int PushLimit(const char* ptr, int limit) { + PROTOBUF_NODISCARD LimitToken PushLimit(const char* ptr, int limit) { ABSL_DCHECK(limit >= 0 && limit <= INT_MAX - kSlopBytes); // This add is safe due to the invariant above, because // ptr - buffer_end_ <= kSlopBytes. @@ -140,12 +187,14 @@ class PROTOBUF_EXPORT EpsCopyInputStream { limit_end_ = buffer_end_ + (std::min)(0, limit); auto old_limit = limit_; limit_ = limit; - return old_limit - limit; + return LimitToken(old_limit - limit); } - PROTOBUF_NODISCARD bool PopLimit(int delta) { + PROTOBUF_NODISCARD bool PopLimit(LimitToken delta) { + // We must update the limit first before the early return. Otherwise, we can + // end up with an invalid limit and it can lead to integer overflows. + limit_ = limit_ + std::move(delta).token(); if (PROTOBUF_PREDICT_FALSE(!EndedAtLimit())) return false; - limit_ = limit_ + delta; // TODO(gerbens) We could remove this line and hoist the code to // DoneFallback. Study the perf/bin-size effects. limit_end_ = buffer_end_ + (std::min)(0, limit_); @@ -470,13 +519,14 @@ class PROTOBUF_EXPORT ParseContext : public EpsCopyInputStream { template PROTOBUF_NODISCARD PROTOBUF_ALWAYS_INLINE const char* ParseMessage( MessageLite* msg, const char* ptr, const Table* table) { - int old; + LimitToken old; ptr = ReadSizeAndPushLimitAndDepthInlined(ptr, &old); + if (ptr == nullptr) return ptr; auto old_depth = depth_; - ptr = ptr ? TcParser::ParseLoop(msg, ptr, this, table) : nullptr; + ptr = TcParser::ParseLoop(msg, ptr, this, table); if (ptr != nullptr) ABSL_DCHECK_EQ(old_depth, depth_); depth_++; - if (!PopLimit(old)) return nullptr; + if (!PopLimit(std::move(old))) return nullptr; return ptr; } @@ -518,20 +568,20 @@ class PROTOBUF_EXPORT ParseContext : public EpsCopyInputStream { private: // Out-of-line routine to save space in ParseContext::ParseMessage - // int old; + // LimitToken old; // ptr = ReadSizeAndPushLimitAndDepth(ptr, &old) // is equivalent to: // int size = ReadSize(&ptr); // if (!ptr) return nullptr; - // int old = PushLimit(ptr, size); + // LimitToken old = PushLimit(ptr, size); // if (--depth_ < 0) return nullptr; - PROTOBUF_NODISCARD const char* ReadSizeAndPushLimitAndDepth(const char* ptr, - int* old_limit); + PROTOBUF_NODISCARD const char* ReadSizeAndPushLimitAndDepth( + const char* ptr, LimitToken* old_limit); // As above, but fully inlined for the cases where we care about performance // more than size. eg TcParser. PROTOBUF_NODISCARD PROTOBUF_ALWAYS_INLINE const char* - ReadSizeAndPushLimitAndDepthInlined(const char* ptr, int* old_limit); + ReadSizeAndPushLimitAndDepthInlined(const char* ptr, LimitToken* old_limit); // The context keeps an internal stack to keep track of the recursive // part of the parse state. @@ -881,27 +931,25 @@ template ::value, bool>::type> PROTOBUF_NODISCARD const char* ParseContext::ParseMessage(T* msg, const char* ptr) { - int old; + LimitToken old; ptr = ReadSizeAndPushLimitAndDepth(ptr, &old); if (ptr == nullptr) return ptr; auto old_depth = depth_; ptr = msg->_InternalParse(ptr, this); if (ptr != nullptr) ABSL_DCHECK_EQ(old_depth, depth_); depth_++; - if (!PopLimit(old)) return nullptr; + if (!PopLimit(std::move(old))) return nullptr; return ptr; } inline const char* ParseContext::ReadSizeAndPushLimitAndDepthInlined( - const char* ptr, int* old_limit) { + const char* ptr, LimitToken* old_limit) { int size = ReadSize(&ptr); - if (PROTOBUF_PREDICT_FALSE(!ptr)) { - // Make sure this isn't uninitialized even on error return - *old_limit = 0; + if (PROTOBUF_PREDICT_FALSE(!ptr) || depth_ <= 0) { return nullptr; } *old_limit = PushLimit(ptr, size); - if (--depth_ < 0) return nullptr; + --depth_; return ptr; }