Merge pull request #10555 from jhump/jh/fix-consistency-with-very-large-decimal-numbers

protoc: fix consistency with parsing very large decimal numbers
2 years ago · c4644b77cb
parent 0d0164feff 7e745c4910
commit c4644b77cb
7 changed files with 155 additions and 48 deletions
--- a/src/google/protobuf/compiler/parser.cc
+++ b/src/google/protobuf/compiler/parser.cc
@ -288,6 +288,16 @@ bool Parser::ConsumeInteger64(uint64_t max_value, uint64_t* output,
  }
 }

+bool Parser::TryConsumeInteger64(uint64_t max_value, uint64_t* output) {
+  if (LookingAtType(io::Tokenizer::TYPE_INTEGER) &&
+      io::Tokenizer::ParseInteger(input_->current().text, max_value,
+                                  output)) {
+    input_->Next();
+    return true;
+  }
+  return false;
+}
+
 bool Parser::ConsumeNumber(double* output, const char* error) {
  if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
    *output = io::Tokenizer::ParseFloat(input_->current().text);
@ -296,13 +306,19 @@ bool Parser::ConsumeNumber(double* output, const char* error) {
  } else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
    // Also accept integers.
    uint64_t value = 0;
-    if (!io::Tokenizer::ParseInteger(input_->current().text,
+    if (io::Tokenizer::ParseInteger(input_->current().text,
                                     std::numeric_limits<uint64_t>::max(),
                                     &value)) {
+      *output = value;
+    } else if (input_->current().text[0] == '0') {
+      // octal or hexadecimal; don't bother parsing as float
+      AddError("Integer out of range.");
+      // We still return true because we did, in fact, parse a number.
+    } else if (!io::Tokenizer::TryParseFloat(input_->current().text, output)) {
+      // out of int range, and not valid float? 🤷
      AddError("Integer out of range.");
      // We still return true because we did, in fact, parse a number.
    }
-    *output = value;
    input_->Next();
    return true;
  } else if (LookingAt("inf")) {
@ -1575,18 +1591,20 @@ bool Parser::ParseOption(Message* options,
            is_negative
                ? static_cast<uint64_t>(std::numeric_limits<int64_t>::max()) + 1
                : std::numeric_limits<uint64_t>::max();
-        DO(ConsumeInteger64(max_value, &value, "Expected integer."));
-        if (is_negative) {
-          value_location.AddPath(
-              UninterpretedOption::kNegativeIntValueFieldNumber);
-          uninterpreted_option->set_negative_int_value(
-              static_cast<int64_t>(0 - value));
-        } else {
-          value_location.AddPath(
-              UninterpretedOption::kPositiveIntValueFieldNumber);
-          uninterpreted_option->set_positive_int_value(value);
+        if (TryConsumeInteger64(max_value, &value)) {
+          if (is_negative) {
+            value_location.AddPath(
+                UninterpretedOption::kNegativeIntValueFieldNumber);
+            uninterpreted_option->set_negative_int_value(
+                static_cast<int64_t>(0 - value));
+          } else {
+            value_location.AddPath(
+                UninterpretedOption::kPositiveIntValueFieldNumber);
+            uninterpreted_option->set_positive_int_value(value);
+          }
+          break;
        }
-        break;
+        // value too large for an integer; fall through below to treat as floating point
      }

      case io::Tokenizer::TYPE_FLOAT: {
--- a/src/google/protobuf/compiler/parser.h
+++ b/src/google/protobuf/compiler/parser.h
@ -180,6 +180,9 @@ class PROTOBUF_EXPORT Parser {
  // is greater than max_value, an error will be reported.
  bool ConsumeInteger64(uint64_t max_value, uint64_t* output,
                        const char* error);
+  // Try to consume a 64-bit integer and store its value in "output".  No
+  // error is reported on failure, allowing caller to consume token another way.
+  bool TryConsumeInteger64(uint64_t max_value, uint64_t* output);
  // Consume a number and store its value in "output".  This will accept
  // tokens of either INTEGER or FLOAT type.
  bool ConsumeNumber(double* output, const char* error);
--- a/src/google/protobuf/compiler/parser_unittest.cc
+++ b/src/google/protobuf/compiler/parser_unittest.cc
@ -592,6 +592,56 @@ TEST_F(ParseMessageTest, FieldOptions) {
      "}");
 }

+TEST_F(ParseMessageTest, FieldOptionsSupportLargeDecimalLiteral) {
+  // decimal integer literal > uint64 max
+  ExpectParsesTo(
+      "import \"google/protobuf/descriptor.proto\";\n"
+      "extend google.protobuf.FieldOptions {\n"
+      "  optional double f = 10101;\n"
+      "}\n"
+      "message TestMessage {\n"
+      "  optional double a = 1 [default = 18446744073709551616];\n"
+      "  optional double b = 2 [default = -18446744073709551616];\n"
+      "  optional double c = 3 [(f) = 18446744073709551616];\n"
+      "  optional double d = 4 [(f) = -18446744073709551616];\n"
+      "}\n",
+
+      "dependency: \"google/protobuf/descriptor.proto\""
+      "extension {"
+      "  name: \"f\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 10101"
+      "  extendee: \"google.protobuf.FieldOptions\""
+      "}"
+      "message_type {"
+      "  name: \"TestMessage\""
+      "  field {"
+      "    name: \"a\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 1"
+      "    default_value: \"1.8446744073709552e+19\""
+      "  }"
+      "  field {"
+      "    name: \"b\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 2"
+      "    default_value: \"-1.8446744073709552e+19\""
+      "  }"
+      "  field {"
+      "    name: \"c\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 3"
+      "    options{"
+      "      uninterpreted_option{"
+      "        name{ name_part: \"f\" is_extension: true }"
+      "        double_value: 1.8446744073709552e+19"
+      "      }"
+      "    }"
+      "  }"
+      "  field {"
+      "    name: \"d\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 4"
+      "    options{"
+      "      uninterpreted_option{"
+      "        name{ name_part: \"f\" is_extension: true }"
+      "        double_value: -1.8446744073709552e+19"
+      "      }"
+      "    }"
+      "  }"
+      "}");
+}
+
 TEST_F(ParseMessageTest, Oneof) {
  ExpectParsesTo(
      "message TestMessage {\n"
@ -1891,6 +1941,22 @@ TEST_F(ParserValidationErrorTest, FieldDefaultValueError) {
      "2:32: Enum type \"Baz\" has no value named \"NO_SUCH_VALUE\".\n");
 }

+TEST_F(ParserValidationErrorTest, FieldDefaultIntegerOutOfRange) {
+  ExpectHasErrors(
+      "message Foo {\n"
+      "  optional double bar = 1 [default = 0x10000000000000000];\n"
+      "}\n",
+      "1:37: Integer out of range.\n");
+}
+
+TEST_F(ParserValidationErrorTest, FieldOptionOutOfRange) {
+  ExpectHasErrors(
+      "message Foo {\n"
+      "  optional double bar = 1 [foo = 0x10000000000000000];\n"
+      "}\n",
+      "1:33: Integer out of range.\n");
+}
+
 TEST_F(ParserValidationErrorTest, FileOptionNameError) {
  ExpectHasValidationErrors(
      "option foo = 5;",
--- a/src/google/protobuf/descriptor.cc
+++ b/src/google/protobuf/descriptor.cc
@ -58,6 +58,7 @@
 #include "absl/strings/ascii.h"
 #include "absl/strings/escaping.h"
 #include "absl/strings/str_cat.h"
+#include "absl/strings/str_format.h"
 #include "google/protobuf/stubs/stringprintf.h"
 #include "absl/strings/str_join.h"
 #include "absl/strings/str_split.h"
@ -7675,6 +7676,27 @@ bool DescriptorBuilder::OptionInterpreter::ExamineIfOptionIsSet(
  return true;
 }

+namespace {
+// Helpers for method below
+
+template <typename T> std::string ValueOutOfRange(
+    absl::string_view type_name, absl::string_view option_name) {
+  return absl::StrFormat(
+    "Value out of range, %d to %d, for %s option \"%s\".", \
+    std::numeric_limits<T>::min(), std::numeric_limits<T>::max(),
+    type_name, option_name);
+}
+
+template <typename T> std::string ValueMustBeInt(
+    absl::string_view type_name, absl::string_view option_name) {
+  return absl::StrFormat(
+    "Value must be integer, from %d to %d, for %s option \"%s\".", \
+    std::numeric_limits<T>::min(), std::numeric_limits<T>::max(),
+    type_name, option_name);
+}
+
+} // namespace
+
 bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
    const FieldDescriptor* option_field, UnknownFieldSet* unknown_fields) {
  // We switch on the CppType to validate.
@ -7683,8 +7705,7 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
      if (uninterpreted_option_->has_positive_int_value()) {
        if (uninterpreted_option_->positive_int_value() >
            static_cast<uint64_t>(std::numeric_limits<int32_t>::max())) {
-          return AddValueError("Value out of range for int32 option \"" +
-                               option_field->full_name() + "\".");
+          return AddValueError(ValueOutOfRange<int32_t>("int32", option_field->full_name()));
        } else {
          SetInt32(option_field->number(),
                   uninterpreted_option_->positive_int_value(),
@ -7693,16 +7714,14 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
      } else if (uninterpreted_option_->has_negative_int_value()) {
        if (uninterpreted_option_->negative_int_value() <
            static_cast<int64_t>(std::numeric_limits<int32_t>::min())) {
-          return AddValueError("Value out of range for int32 option \"" +
-                               option_field->full_name() + "\".");
+          return AddValueError(ValueOutOfRange<int32_t>("int32", option_field->full_name()));
        } else {
          SetInt32(option_field->number(),
                   uninterpreted_option_->negative_int_value(),
                   option_field->type(), unknown_fields);
        }
      } else {
-        return AddValueError("Value must be integer for int32 option \"" +
-                             option_field->full_name() + "\".");
+        return AddValueError(ValueMustBeInt<int32_t>("int32", option_field->full_name()));
      }
      break;

@ -7710,8 +7729,7 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
      if (uninterpreted_option_->has_positive_int_value()) {
        if (uninterpreted_option_->positive_int_value() >
            static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) {
-          return AddValueError("Value out of range for int64 option \"" +
-                               option_field->full_name() + "\".");
+          return AddValueError(ValueOutOfRange<int64_t>("int64", option_field->full_name()));
        } else {
          SetInt64(option_field->number(),
                   uninterpreted_option_->positive_int_value(),
@ -7722,8 +7740,7 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
                 uninterpreted_option_->negative_int_value(),
                 option_field->type(), unknown_fields);
      } else {
-        return AddValueError("Value must be integer for int64 option \"" +
-                             option_field->full_name() + "\".");
+        return AddValueError(ValueMustBeInt<int64_t>("int64", option_field->full_name()));
      }
      break;

@ -7731,18 +7748,14 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
      if (uninterpreted_option_->has_positive_int_value()) {
        if (uninterpreted_option_->positive_int_value() >
            std::numeric_limits<uint32_t>::max()) {
-          return AddValueError("Value out of range for uint32 option \"" +
-                               option_field->name() + "\".");
+          return AddValueError(ValueOutOfRange<uint32_t>("uint32", option_field->full_name()));
        } else {
          SetUInt32(option_field->number(),
                    uninterpreted_option_->positive_int_value(),
                    option_field->type(), unknown_fields);
        }
      } else {
-        return AddValueError(
-            "Value must be non-negative integer for uint32 "
-            "option \"" +
-            option_field->full_name() + "\".");
+        return AddValueError(ValueMustBeInt<uint32_t>("uint32", option_field->full_name()));
      }
      break;

@ -7752,10 +7765,7 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
                  uninterpreted_option_->positive_int_value(),
                  option_field->type(), unknown_fields);
      } else {
-        return AddValueError(
-            "Value must be non-negative integer for uint64 "
-            "option \"" +
-            option_field->full_name() + "\".");
+        return AddValueError(ValueMustBeInt<uint64_t>("uint64", option_field->full_name()));
      }
      break;

--- a/src/google/protobuf/descriptor_unittest.cc
+++ b/src/google/protobuf/descriptor_unittest.cc
@ -5557,7 +5557,7 @@ TEST_F(ValidationErrorTest, Int32OptionValueOutOfPositiveRange) {
      "                                 positive_int_value: 0x80000000 } "
      "}",

-      "foo.proto: foo.proto: OPTION_VALUE: Value out of range "
+      "foo.proto: foo.proto: OPTION_VALUE: Value out of range, -2147483648 to 2147483647, "
      "for int32 option \"foo\".\n");
 }

@ -5574,7 +5574,7 @@ TEST_F(ValidationErrorTest, Int32OptionValueOutOfNegativeRange) {
      "                                 negative_int_value: -0x80000001 } "
      "}",

-      "foo.proto: foo.proto: OPTION_VALUE: Value out of range "
+      "foo.proto: foo.proto: OPTION_VALUE: Value out of range, -2147483648 to 2147483647, "
      "for int32 option \"foo\".\n");
 }

@ -5590,7 +5590,7 @@ TEST_F(ValidationErrorTest, Int32OptionValueIsNotPositiveInt) {
      "                                        is_extension: true } "
      "                                 string_value: \"5\" } }",

-      "foo.proto: foo.proto: OPTION_VALUE: Value must be integer "
+      "foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from -2147483648 to 2147483647, "
      "for int32 option \"foo\".\n");
 }

@ -5608,7 +5608,7 @@ TEST_F(ValidationErrorTest, Int64OptionValueOutOfRange) {
      "} "
      "}",

-      "foo.proto: foo.proto: OPTION_VALUE: Value out of range "
+      "foo.proto: foo.proto: OPTION_VALUE: Value out of range, -9223372036854775808 to 9223372036854775807, "
      "for int64 option \"foo\".\n");
 }

@ -5624,7 +5624,7 @@ TEST_F(ValidationErrorTest, Int64OptionValueIsNotPositiveInt) {
      "                                        is_extension: true } "
      "                                 identifier_value: \"5\" } }",

-      "foo.proto: foo.proto: OPTION_VALUE: Value must be integer "
+      "foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from -9223372036854775808 to 9223372036854775807, "
      "for int64 option \"foo\".\n");
 }

@ -5640,7 +5640,7 @@ TEST_F(ValidationErrorTest, UInt32OptionValueOutOfRange) {
      "                                        is_extension: true } "
      "                                 positive_int_value: 0x100000000 } }",

-      "foo.proto: foo.proto: OPTION_VALUE: Value out of range "
+      "foo.proto: foo.proto: OPTION_VALUE: Value out of range, 0 to 4294967295, "
      "for uint32 option \"foo\".\n");
 }

@ -5656,7 +5656,7 @@ TEST_F(ValidationErrorTest, UInt32OptionValueIsNotPositiveInt) {
      "                                        is_extension: true } "
      "                                 double_value: -5.6 } }",

-      "foo.proto: foo.proto: OPTION_VALUE: Value must be non-negative integer "
+      "foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from 0 to 4294967295, "
      "for uint32 option \"foo\".\n");
 }

@ -5672,7 +5672,7 @@ TEST_F(ValidationErrorTest, UInt64OptionValueIsNotPositiveInt) {
      "                                        is_extension: true } "
      "                                 negative_int_value: -5 } }",

-      "foo.proto: foo.proto: OPTION_VALUE: Value must be non-negative integer "
+      "foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from 0 to 18446744073709551615, "
      "for uint64 option \"foo\".\n");
 }

--- a/src/google/protobuf/io/tokenizer.cc
+++ b/src/google/protobuf/io/tokenizer.cc
@ -1002,9 +1002,20 @@ bool Tokenizer::ParseInteger(const std::string& text, uint64_t max_value,
 }

 double Tokenizer::ParseFloat(const std::string& text) {
+  double result = 0;
+  if (!TryParseFloat(text, &result)) {
+    GOOGLE_LOG(DFATAL)
+        << " Tokenizer::ParseFloat() passed text that could not have been"
+           " tokenized as a float: "
+        << absl::CEscape(text);
+  }
+  return result;
+}
+
+bool Tokenizer::TryParseFloat(const std::string& text, double* result) {
  const char* start = text.c_str();
  char* end;
-  double result = NoLocaleStrtod(start, &end);
+  *result = NoLocaleStrtod(start, &end);

  // "1e" is not a valid float, but if the tokenizer reads it, it will
  // report an error but still return it as a valid token.  We need to
@ -1020,12 +1031,7 @@ double Tokenizer::ParseFloat(const std::string& text) {
    ++end;
  }

-  GOOGLE_LOG_IF(DFATAL,
-         static_cast<size_t>(end - start) != text.size() || *start == '-')
-      << " Tokenizer::ParseFloat() passed text that could not have been"
-         " tokenized as a float: "
-      << absl::CEscape(text);
-  return result;
+  return static_cast<size_t>(end - start) == text.size() && *start != '-';
 }

 // Helper to append a Unicode code point to a string as UTF8, without bringing
--- a/src/google/protobuf/io/tokenizer.h
+++ b/src/google/protobuf/io/tokenizer.h
@ -214,6 +214,10 @@ class PROTOBUF_EXPORT Tokenizer {
  // result is undefined (possibly an assert failure).
  static double ParseFloat(const std::string& text);

+  // Parses given text as if it were a TYPE_FLOAT token.  Returns false if the
+  // given text is not actually a valid float literal.
+  static bool TryParseFloat(const std::string& text, double* result);
+
  // Parses a TYPE_STRING token.  This never fails, so long as the text actually
  // comes from a TYPE_STRING token parsed by Tokenizer.  If it doesn't, the
  // result is undefined (possibly an assert failure).