diff --git a/csharp/src/Google.Protobuf.Test/testprotos.pb b/csharp/src/Google.Protobuf.Test/testprotos.pb index e1cef477c3..106ada82c7 100644 Binary files a/csharp/src/Google.Protobuf.Test/testprotos.pb and b/csharp/src/Google.Protobuf.Test/testprotos.pb differ diff --git a/src/google/protobuf/parse_context.cc b/src/google/protobuf/parse_context.cc index 393d0884d7..41eae4c60b 100644 --- a/src/google/protobuf/parse_context.cc +++ b/src/google/protobuf/parse_context.cc @@ -30,6 +30,8 @@ #include "google/protobuf/parse_context.h" +#include + #include "absl/strings/string_view.h" #include "google/protobuf/message_lite.h" #include "google/protobuf/repeated_field.h" @@ -44,10 +46,9 @@ namespace google { namespace protobuf { namespace internal { -namespace { - // Only call if at start of tag. -bool ParseEndsInSlopRegion(const char* begin, int overrun, int depth) { +bool EpsCopyInputStream::ParseEndsInSlopRegion(const char* begin, int overrun, + int depth) { constexpr int kSlopBytes = EpsCopyInputStream::kSlopBytes; GOOGLE_ABSL_DCHECK_GE(overrun, 0); GOOGLE_ABSL_DCHECK_LE(overrun, kSlopBytes); @@ -96,42 +97,40 @@ bool ParseEndsInSlopRegion(const char* begin, int overrun, int depth) { return false; } -} // namespace - const char* EpsCopyInputStream::NextBuffer(int overrun, int depth) { if (next_chunk_ == nullptr) return nullptr; // We've reached end of stream. - if (next_chunk_ != buffer_) { + if (next_chunk_ != patch_buffer_) { GOOGLE_ABSL_DCHECK(size_ > kSlopBytes); // The chunk is large enough to be used directly buffer_end_ = next_chunk_ + size_ - kSlopBytes; auto res = next_chunk_; - next_chunk_ = buffer_; + next_chunk_ = patch_buffer_; if (aliasing_ == kOnPatch) aliasing_ = kNoDelta; return res; } // Move the slop bytes of previous buffer to start of the patch buffer. // Note we must use memmove because the previous buffer could be part of - // buffer_. - std::memmove(buffer_, buffer_end_, kSlopBytes); + // patch_buffer_. + std::memmove(patch_buffer_, buffer_end_, kSlopBytes); if (overall_limit_ > 0 && - (depth < 0 || !ParseEndsInSlopRegion(buffer_, overrun, depth))) { + (depth < 0 || !ParseEndsInSlopRegion(patch_buffer_, overrun, depth))) { const void* data; // ZeroCopyInputStream indicates Next may return 0 size buffers. Hence // we loop. while (StreamNext(&data)) { if (size_ > kSlopBytes) { // We got a large chunk - std::memcpy(buffer_ + kSlopBytes, data, kSlopBytes); + std::memcpy(patch_buffer_ + kSlopBytes, data, kSlopBytes); next_chunk_ = static_cast(data); - buffer_end_ = buffer_ + kSlopBytes; + buffer_end_ = patch_buffer_ + kSlopBytes; if (aliasing_ >= kNoDelta) aliasing_ = kOnPatch; - return buffer_; + return patch_buffer_; } else if (size_ > 0) { - std::memcpy(buffer_ + kSlopBytes, data, size_); - next_chunk_ = buffer_; - buffer_end_ = buffer_ + size_; + std::memcpy(patch_buffer_ + kSlopBytes, data, size_); + next_chunk_ = patch_buffer_; + buffer_end_ = patch_buffer_ + size_; if (aliasing_ >= kNoDelta) aliasing_ = kOnPatch; - return buffer_; + return patch_buffer_; } GOOGLE_ABSL_DCHECK(size_ == 0) << size_; } @@ -145,12 +144,12 @@ const char* EpsCopyInputStream::NextBuffer(int overrun, int depth) { // array. This guarantees string_view's are always aliased if parsed from // an array. aliasing_ = reinterpret_cast(buffer_end_) - - reinterpret_cast(buffer_); + reinterpret_cast(patch_buffer_); } next_chunk_ = nullptr; - buffer_end_ = buffer_ + kSlopBytes; + buffer_end_ = patch_buffer_ + kSlopBytes; size_ = 0; - return buffer_; + return patch_buffer_; } const char* EpsCopyInputStream::Next() { @@ -244,13 +243,13 @@ const char* EpsCopyInputStream::InitFrom(io::ZeroCopyInputStream* zcis) { auto ptr = static_cast(data); limit_ -= size - kSlopBytes; limit_end_ = buffer_end_ = ptr + size - kSlopBytes; - next_chunk_ = buffer_; + next_chunk_ = patch_buffer_; if (aliasing_ == kOnPatch) aliasing_ = kNoDelta; return ptr; } else { - limit_end_ = buffer_end_ = buffer_ + kSlopBytes; - next_chunk_ = buffer_; - auto ptr = buffer_ + 2 * kSlopBytes - size; + limit_end_ = buffer_end_ = patch_buffer_ + kSlopBytes; + next_chunk_ = patch_buffer_; + auto ptr = patch_buffer_ + kPatchBufferSize - size; std::memcpy(ptr, data, size); return ptr; } @@ -258,8 +257,8 @@ const char* EpsCopyInputStream::InitFrom(io::ZeroCopyInputStream* zcis) { overall_limit_ = 0; next_chunk_ = nullptr; size_ = 0; - limit_end_ = buffer_end_ = buffer_; - return buffer_; + limit_end_ = buffer_end_ = patch_buffer_; + return patch_buffer_; } const char* ParseContext::ReadSizeAndPushLimitAndDepth(const char* ptr, diff --git a/src/google/protobuf/parse_context.h b/src/google/protobuf/parse_context.h index 8e2c6b0600..e79d238617 100644 --- a/src/google/protobuf/parse_context.h +++ b/src/google/protobuf/parse_context.h @@ -115,15 +115,14 @@ inline void WriteLengthDelimited(uint32_t num, absl::string_view val, class PROTOBUF_EXPORT EpsCopyInputStream { public: - enum { kSlopBytes = 16, kMaxCordBytesToCopy = 512 }; - + enum { kMaxCordBytesToCopy = 512 }; explicit EpsCopyInputStream(bool enable_aliasing) : aliasing_(enable_aliasing ? kOnPatch : kNoAliasing) {} void BackUp(const char* ptr) { GOOGLE_ABSL_DCHECK(ptr <= buffer_end_ + kSlopBytes); int count; - if (next_chunk_ == buffer_) { + if (next_chunk_ == patch_buffer_) { count = static_cast(buffer_end_ + kSlopBytes - ptr); } else { count = size_ + static_cast(buffer_end_ - ptr); @@ -248,21 +247,21 @@ class PROTOBUF_EXPORT EpsCopyInputStream { if (flat.size() > kSlopBytes) { limit_ = kSlopBytes; limit_end_ = buffer_end_ = flat.data() + flat.size() - kSlopBytes; - next_chunk_ = buffer_; + next_chunk_ = patch_buffer_; if (aliasing_ == kOnPatch) aliasing_ = kNoDelta; return flat.data(); } else { if (!flat.empty()) { - std::memcpy(buffer_, flat.data(), flat.size()); + std::memcpy(patch_buffer_, flat.data(), flat.size()); } limit_ = 0; - limit_end_ = buffer_end_ = buffer_ + flat.size(); + limit_end_ = buffer_end_ = patch_buffer_ + flat.size(); next_chunk_ = nullptr; if (aliasing_ == kOnPatch) { aliasing_ = reinterpret_cast(flat.data()) - - reinterpret_cast(buffer_); + reinterpret_cast(patch_buffer_); } - return buffer_; + return patch_buffer_; } } @@ -278,13 +277,19 @@ class PROTOBUF_EXPORT EpsCopyInputStream { } private: + enum { kSlopBytes = 16, kPatchBufferSize = 32 }; + static_assert(kPatchBufferSize >= kSlopBytes * 2, + "Patch buffer needs to be at least large enough to hold all " + "the slop bytes from the previous buffer, plus the first " + "kSlopBytes from the next buffer."); + const char* limit_end_; // buffer_end_ + min(limit_, 0) const char* buffer_end_; const char* next_chunk_; int size_; int limit_; // relative to buffer_end_; io::ZeroCopyInputStream* zcis_ = nullptr; - char buffer_[2 * kSlopBytes] = {}; + char patch_buffer_[kPatchBufferSize] = {}; enum { kNoAliasing = 0, kOnPatch = 1, kNoDelta = 2 }; std::uintptr_t aliasing_ = kNoAliasing; // This variable is used to communicate how the parse ended, in order to @@ -329,6 +334,7 @@ class PROTOBUF_EXPORT EpsCopyInputStream { const char* SkipFallback(const char* ptr, int size); const char* AppendStringFallback(const char* ptr, int size, std::string* str); const char* ReadStringFallback(const char* ptr, int size, std::string* str); + static bool ParseEndsInSlopRegion(const char* begin, int overrun, int depth); bool StreamNext(const void** data) { bool res = zcis_->Next(data, &size_); if (res) overall_limit_ -= size_; @@ -388,6 +394,10 @@ class PROTOBUF_EXPORT EpsCopyInputStream { ptr, [str](const char* p, ptrdiff_t s) { str->append(p, s); }); } friend class ImplicitWeakMessage; + + // Needs access to kSlopBytes. + friend PROTOBUF_EXPORT std::pair ReadSizeFallback( + const char* p, uint32_t res); }; using LazyEagerVerifyFnType = const char* (*)(const char* ptr,