Avoid int32 overflow in PackedEnumSmallRange.

PiperOrigin-RevId: 534572557
pull/12871/head
Justin Lebar 2 years ago committed by Copybara-Service
parent 7cdd9f5113
commit c8c7ad39a0
  1. 3
      src/google/protobuf/BUILD.bazel
  2. 7
      src/google/protobuf/generated_message_tctable_lite.cc
  3. 79
      src/google/protobuf/generated_message_tctable_lite_test.cc
  4. 10
      src/google/protobuf/unittest.proto

@ -1085,6 +1085,7 @@ cc_test(
],
}),
deps = [
":cc_test_protos",
":protobuf_lite",
"@com_google_googletest//:gtest",
"@com_google_googletest//:gtest_main",
@ -1453,8 +1454,8 @@ cc_test(
name = "descriptor_visitor_test",
srcs = ["descriptor_visitor_test.cc"],
deps = [
":protobuf",
":cc_test_protos",
":protobuf",
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest",
"@com_google_googletest//:gtest_main",

@ -28,7 +28,9 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <algorithm>
#include <cstdint>
#include <limits>
#include <numeric>
#include <string>
#include <type_traits>
@ -1404,7 +1406,10 @@ const char* TcParser::PackedEnumSmallRange(PROTOBUF_TC_PARAM_DECL) {
// For enums that fit in one varint byte, optimistically assume that all
// the values are one byte long (i.e. no large unknown values). If so,
// we know exactly how many values we're going to get.
field->Reserve(field->size() + size_bytes);
int64_t new_size =
std::min(int64_t{field->size()} + size_bytes,
int64_t{std::numeric_limits<int32_t>::max()});
field->Reserve(static_cast<int32_t>(new_size));
});
}

@ -34,6 +34,7 @@
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "absl/types/optional.h"
#include "google/protobuf/unittest.pb.h"
#include "google/protobuf/wire_format_lite.h"
namespace google {
@ -820,6 +821,84 @@ TEST_F(FindFieldEntryTest, BigMessage) {
}
}
TEST(GeneratedMessageTctableLiteTest, PackedEnumSmallRange) {
// RepeatedField::Reserve(n) may set the capacity to something greater than n,
// and this "upgrade" algorithm can even be different depending on compiler
// optimizations! This value is chosen such that -- with the current
// implementation of Reserve() -- Reserve(kNumVals) always results in a
// different final capacity than you'd get by adding elements one at a time.
constexpr int kNumVals = 1023;
protobuf_unittest::TestPackedEnumSmallRange proto;
for (int i = 0; i < kNumVals; i++) {
proto.add_vals(protobuf_unittest::TestPackedEnumSmallRange::FOO);
}
protobuf_unittest::TestPackedEnumSmallRange new_proto;
new_proto.ParseFromString(proto.SerializeAsString());
// We should have reserved exactly the right size for new_proto's `vals`,
// rather than growing it on demand like we did in `proto`.
EXPECT_LT(new_proto.vals().Capacity(), proto.vals().Capacity());
// Check that new_proto's capacity is equal to exactly what we'd get from
// calling Reserve(n).
protobuf_unittest::TestPackedEnumSmallRange empty_proto;
empty_proto.mutable_vals()->Reserve(kNumVals);
EXPECT_EQ(new_proto.vals().Capacity(), empty_proto.vals().Capacity());
}
// Create a serialized proto which falsely claims to have a packed array of
// enums of length a little less than 2^31. We merge this with a proto that
// already has a few elements in this array.
//
// This test checks that the parser doesn't overflow an int32 when computing the
// array's new length.
TEST(GeneratedMessageTctableLiteTest, PackedEnumSmallRangeLargeSize) {
#ifdef ABSL_HAVE_MEMORY_SANITIZER
// This test attempts to allocate 8GB of memory, which OOMs MSAN.
return;
#endif
#ifdef _WIN32
// This test OOMs on Windows. I think this is because Windows is committing
// the entirety of the 8GB malloc'ed range, whereas Linux maps it but doesn't
// commit it.
return;
#endif
// This test is only meaningful on 64-bit platforms. On 32-bit platforms, we
// can't allocate 2^31 4-byte elements anyway; that is a straightforward OOM.
if (sizeof(size_t) < 8) {
return;
}
// Create a serialized proto that contains just `field 1: length ~2^31`. We
// don't put the actual data in there, just the field header.
uint8_t serialize_buffer[64];
uint8_t* serialize_ptr = serialize_buffer;
serialize_ptr = WireFormatLite::WriteTagToArray(
1, WireFormatLite::WIRETYPE_LENGTH_DELIMITED, serialize_ptr);
// INT32_MAX is not a valid array length because RepeatedField uses a bit of
// space for its metadata. We can be a little short of it, that's fine.
serialize_ptr = WireFormatLite::WriteUInt32NoTagToArray(
std::numeric_limits<int32_t>::max() - 64, serialize_ptr);
absl::string_view serialized{
reinterpret_cast<char*>(&serialize_buffer[0]),
static_cast<size_t>(serialize_ptr - serialize_buffer)};
// This isn't a legal proto because the given array length (a little less than
// 2^31) doesn't match the actual array length (0). But all we're checking
// for here is that we don't have UB when deserializing.
protobuf_unittest::TestPackedEnumSmallRange proto;
// Add a few elements to the proto so that when we MergeFromString, the final
// array length is greater than INT32_MAX.
for (int i = 0; i < 128; i++) {
proto.add_vals(protobuf_unittest::TestPackedEnumSmallRange::FOO);
}
EXPECT_FALSE(proto.MergeFromString(serialized));
}
} // namespace internal
} // namespace protobuf
} // namespace google

@ -1685,3 +1685,13 @@ message TestCord{
optional bytes optional_bytes_cord = 1 [ctype=CORD];
optional bytes optional_bytes_cord_default = 2 [ctype=CORD, default = "hello"];
}
message TestPackedEnumSmallRange {
enum NestedEnum {
UNSPECIFIED = 0;
FOO = 1;
BAR = 2;
BAZ = 3;
}
repeated NestedEnum vals = 1 [packed = true];
}

Loading…
Cancel
Save