From c8c7ad39a0070113231c2368a72971408f3c2a79 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Tue, 23 May 2023 15:08:21 -0700 Subject: [PATCH] Avoid int32 overflow in PackedEnumSmallRange. PiperOrigin-RevId: 534572557 --- src/google/protobuf/BUILD.bazel | 3 +- .../generated_message_tctable_lite.cc | 7 +- .../generated_message_tctable_lite_test.cc | 79 +++++++++++++++++++ src/google/protobuf/unittest.proto | 10 +++ 4 files changed, 97 insertions(+), 2 deletions(-) diff --git a/src/google/protobuf/BUILD.bazel b/src/google/protobuf/BUILD.bazel index 331954d408..06b4a98b4b 100644 --- a/src/google/protobuf/BUILD.bazel +++ b/src/google/protobuf/BUILD.bazel @@ -1085,6 +1085,7 @@ cc_test( ], }), deps = [ + ":cc_test_protos", ":protobuf_lite", "@com_google_googletest//:gtest", "@com_google_googletest//:gtest_main", @@ -1453,8 +1454,8 @@ cc_test( name = "descriptor_visitor_test", srcs = ["descriptor_visitor_test.cc"], deps = [ - ":protobuf", ":cc_test_protos", + ":protobuf", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest", "@com_google_googletest//:gtest_main", diff --git a/src/google/protobuf/generated_message_tctable_lite.cc b/src/google/protobuf/generated_message_tctable_lite.cc index 3e1c33f097..ee21a426ec 100644 --- a/src/google/protobuf/generated_message_tctable_lite.cc +++ b/src/google/protobuf/generated_message_tctable_lite.cc @@ -28,7 +28,9 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include #include +#include #include #include #include @@ -1404,7 +1406,10 @@ const char* TcParser::PackedEnumSmallRange(PROTOBUF_TC_PARAM_DECL) { // For enums that fit in one varint byte, optimistically assume that all // the values are one byte long (i.e. no large unknown values). If so, // we know exactly how many values we're going to get. - field->Reserve(field->size() + size_bytes); + int64_t new_size = + std::min(int64_t{field->size()} + size_bytes, + int64_t{std::numeric_limits::max()}); + field->Reserve(static_cast(new_size)); }); } diff --git a/src/google/protobuf/generated_message_tctable_lite_test.cc b/src/google/protobuf/generated_message_tctable_lite_test.cc index 12aeced544..26989d07ce 100644 --- a/src/google/protobuf/generated_message_tctable_lite_test.cc +++ b/src/google/protobuf/generated_message_tctable_lite_test.cc @@ -34,6 +34,7 @@ #include #include #include "absl/types/optional.h" +#include "google/protobuf/unittest.pb.h" #include "google/protobuf/wire_format_lite.h" namespace google { @@ -820,6 +821,84 @@ TEST_F(FindFieldEntryTest, BigMessage) { } } +TEST(GeneratedMessageTctableLiteTest, PackedEnumSmallRange) { + // RepeatedField::Reserve(n) may set the capacity to something greater than n, + // and this "upgrade" algorithm can even be different depending on compiler + // optimizations! This value is chosen such that -- with the current + // implementation of Reserve() -- Reserve(kNumVals) always results in a + // different final capacity than you'd get by adding elements one at a time. + constexpr int kNumVals = 1023; + protobuf_unittest::TestPackedEnumSmallRange proto; + for (int i = 0; i < kNumVals; i++) { + proto.add_vals(protobuf_unittest::TestPackedEnumSmallRange::FOO); + } + + protobuf_unittest::TestPackedEnumSmallRange new_proto; + new_proto.ParseFromString(proto.SerializeAsString()); + + // We should have reserved exactly the right size for new_proto's `vals`, + // rather than growing it on demand like we did in `proto`. + EXPECT_LT(new_proto.vals().Capacity(), proto.vals().Capacity()); + + // Check that new_proto's capacity is equal to exactly what we'd get from + // calling Reserve(n). + protobuf_unittest::TestPackedEnumSmallRange empty_proto; + empty_proto.mutable_vals()->Reserve(kNumVals); + EXPECT_EQ(new_proto.vals().Capacity(), empty_proto.vals().Capacity()); +} + +// Create a serialized proto which falsely claims to have a packed array of +// enums of length a little less than 2^31. We merge this with a proto that +// already has a few elements in this array. +// +// This test checks that the parser doesn't overflow an int32 when computing the +// array's new length. +TEST(GeneratedMessageTctableLiteTest, PackedEnumSmallRangeLargeSize) { +#ifdef ABSL_HAVE_MEMORY_SANITIZER + // This test attempts to allocate 8GB of memory, which OOMs MSAN. + return; +#endif + +#ifdef _WIN32 + // This test OOMs on Windows. I think this is because Windows is committing + // the entirety of the 8GB malloc'ed range, whereas Linux maps it but doesn't + // commit it. + return; +#endif + + // This test is only meaningful on 64-bit platforms. On 32-bit platforms, we + // can't allocate 2^31 4-byte elements anyway; that is a straightforward OOM. + if (sizeof(size_t) < 8) { + return; + } + + // Create a serialized proto that contains just `field 1: length ~2^31`. We + // don't put the actual data in there, just the field header. + uint8_t serialize_buffer[64]; + uint8_t* serialize_ptr = serialize_buffer; + serialize_ptr = WireFormatLite::WriteTagToArray( + 1, WireFormatLite::WIRETYPE_LENGTH_DELIMITED, serialize_ptr); + // INT32_MAX is not a valid array length because RepeatedField uses a bit of + // space for its metadata. We can be a little short of it, that's fine. + serialize_ptr = WireFormatLite::WriteUInt32NoTagToArray( + std::numeric_limits::max() - 64, serialize_ptr); + + absl::string_view serialized{ + reinterpret_cast(&serialize_buffer[0]), + static_cast(serialize_ptr - serialize_buffer)}; + + // This isn't a legal proto because the given array length (a little less than + // 2^31) doesn't match the actual array length (0). But all we're checking + // for here is that we don't have UB when deserializing. + protobuf_unittest::TestPackedEnumSmallRange proto; + // Add a few elements to the proto so that when we MergeFromString, the final + // array length is greater than INT32_MAX. + for (int i = 0; i < 128; i++) { + proto.add_vals(protobuf_unittest::TestPackedEnumSmallRange::FOO); + } + EXPECT_FALSE(proto.MergeFromString(serialized)); +} + } // namespace internal } // namespace protobuf } // namespace google diff --git a/src/google/protobuf/unittest.proto b/src/google/protobuf/unittest.proto index b7d4182c6e..4f73ea350b 100644 --- a/src/google/protobuf/unittest.proto +++ b/src/google/protobuf/unittest.proto @@ -1685,3 +1685,13 @@ message TestCord{ optional bytes optional_bytes_cord = 1 [ctype=CORD]; optional bytes optional_bytes_cord_default = 2 [ctype=CORD, default = "hello"]; } + +message TestPackedEnumSmallRange { + enum NestedEnum { + UNSPECIFIED = 0; + FOO = 1; + BAR = 2; + BAZ = 3; + } + repeated NestedEnum vals = 1 [packed = true]; +}