Merge pull request #10555 from jhump/jh/fix-consistency-with-very-large-decimal-numbers

protoc: fix consistency with parsing very large decimal numbers
pull/10594/head
Matt Fowles Kulukundis 2 years ago committed by GitHub
commit c4644b77cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 44
      src/google/protobuf/compiler/parser.cc
  2. 3
      src/google/protobuf/compiler/parser.h
  3. 66
      src/google/protobuf/compiler/parser_unittest.cc
  4. 50
      src/google/protobuf/descriptor.cc
  5. 16
      src/google/protobuf/descriptor_unittest.cc
  6. 20
      src/google/protobuf/io/tokenizer.cc
  7. 4
      src/google/protobuf/io/tokenizer.h

@ -288,6 +288,16 @@ bool Parser::ConsumeInteger64(uint64_t max_value, uint64_t* output,
} }
} }
bool Parser::TryConsumeInteger64(uint64_t max_value, uint64_t* output) {
if (LookingAtType(io::Tokenizer::TYPE_INTEGER) &&
io::Tokenizer::ParseInteger(input_->current().text, max_value,
output)) {
input_->Next();
return true;
}
return false;
}
bool Parser::ConsumeNumber(double* output, const char* error) { bool Parser::ConsumeNumber(double* output, const char* error) {
if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) { if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
*output = io::Tokenizer::ParseFloat(input_->current().text); *output = io::Tokenizer::ParseFloat(input_->current().text);
@ -296,13 +306,19 @@ bool Parser::ConsumeNumber(double* output, const char* error) {
} else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) { } else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
// Also accept integers. // Also accept integers.
uint64_t value = 0; uint64_t value = 0;
if (!io::Tokenizer::ParseInteger(input_->current().text, if (io::Tokenizer::ParseInteger(input_->current().text,
std::numeric_limits<uint64_t>::max(), std::numeric_limits<uint64_t>::max(),
&value)) { &value)) {
*output = value;
} else if (input_->current().text[0] == '0') {
// octal or hexadecimal; don't bother parsing as float
AddError("Integer out of range.");
// We still return true because we did, in fact, parse a number.
} else if (!io::Tokenizer::TryParseFloat(input_->current().text, output)) {
// out of int range, and not valid float? 🤷
AddError("Integer out of range."); AddError("Integer out of range.");
// We still return true because we did, in fact, parse a number. // We still return true because we did, in fact, parse a number.
} }
*output = value;
input_->Next(); input_->Next();
return true; return true;
} else if (LookingAt("inf")) { } else if (LookingAt("inf")) {
@ -1575,18 +1591,20 @@ bool Parser::ParseOption(Message* options,
is_negative is_negative
? static_cast<uint64_t>(std::numeric_limits<int64_t>::max()) + 1 ? static_cast<uint64_t>(std::numeric_limits<int64_t>::max()) + 1
: std::numeric_limits<uint64_t>::max(); : std::numeric_limits<uint64_t>::max();
DO(ConsumeInteger64(max_value, &value, "Expected integer.")); if (TryConsumeInteger64(max_value, &value)) {
if (is_negative) { if (is_negative) {
value_location.AddPath( value_location.AddPath(
UninterpretedOption::kNegativeIntValueFieldNumber); UninterpretedOption::kNegativeIntValueFieldNumber);
uninterpreted_option->set_negative_int_value( uninterpreted_option->set_negative_int_value(
static_cast<int64_t>(0 - value)); static_cast<int64_t>(0 - value));
} else { } else {
value_location.AddPath( value_location.AddPath(
UninterpretedOption::kPositiveIntValueFieldNumber); UninterpretedOption::kPositiveIntValueFieldNumber);
uninterpreted_option->set_positive_int_value(value); uninterpreted_option->set_positive_int_value(value);
}
break;
} }
break; // value too large for an integer; fall through below to treat as floating point
} }
case io::Tokenizer::TYPE_FLOAT: { case io::Tokenizer::TYPE_FLOAT: {

@ -180,6 +180,9 @@ class PROTOBUF_EXPORT Parser {
// is greater than max_value, an error will be reported. // is greater than max_value, an error will be reported.
bool ConsumeInteger64(uint64_t max_value, uint64_t* output, bool ConsumeInteger64(uint64_t max_value, uint64_t* output,
const char* error); const char* error);
// Try to consume a 64-bit integer and store its value in "output". No
// error is reported on failure, allowing caller to consume token another way.
bool TryConsumeInteger64(uint64_t max_value, uint64_t* output);
// Consume a number and store its value in "output". This will accept // Consume a number and store its value in "output". This will accept
// tokens of either INTEGER or FLOAT type. // tokens of either INTEGER or FLOAT type.
bool ConsumeNumber(double* output, const char* error); bool ConsumeNumber(double* output, const char* error);

@ -592,6 +592,56 @@ TEST_F(ParseMessageTest, FieldOptions) {
"}"); "}");
} }
TEST_F(ParseMessageTest, FieldOptionsSupportLargeDecimalLiteral) {
// decimal integer literal > uint64 max
ExpectParsesTo(
"import \"google/protobuf/descriptor.proto\";\n"
"extend google.protobuf.FieldOptions {\n"
" optional double f = 10101;\n"
"}\n"
"message TestMessage {\n"
" optional double a = 1 [default = 18446744073709551616];\n"
" optional double b = 2 [default = -18446744073709551616];\n"
" optional double c = 3 [(f) = 18446744073709551616];\n"
" optional double d = 4 [(f) = -18446744073709551616];\n"
"}\n",
"dependency: \"google/protobuf/descriptor.proto\""
"extension {"
" name: \"f\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 10101"
" extendee: \"google.protobuf.FieldOptions\""
"}"
"message_type {"
" name: \"TestMessage\""
" field {"
" name: \"a\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 1"
" default_value: \"1.8446744073709552e+19\""
" }"
" field {"
" name: \"b\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 2"
" default_value: \"-1.8446744073709552e+19\""
" }"
" field {"
" name: \"c\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 3"
" options{"
" uninterpreted_option{"
" name{ name_part: \"f\" is_extension: true }"
" double_value: 1.8446744073709552e+19"
" }"
" }"
" }"
" field {"
" name: \"d\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 4"
" options{"
" uninterpreted_option{"
" name{ name_part: \"f\" is_extension: true }"
" double_value: -1.8446744073709552e+19"
" }"
" }"
" }"
"}");
}
TEST_F(ParseMessageTest, Oneof) { TEST_F(ParseMessageTest, Oneof) {
ExpectParsesTo( ExpectParsesTo(
"message TestMessage {\n" "message TestMessage {\n"
@ -1891,6 +1941,22 @@ TEST_F(ParserValidationErrorTest, FieldDefaultValueError) {
"2:32: Enum type \"Baz\" has no value named \"NO_SUCH_VALUE\".\n"); "2:32: Enum type \"Baz\" has no value named \"NO_SUCH_VALUE\".\n");
} }
TEST_F(ParserValidationErrorTest, FieldDefaultIntegerOutOfRange) {
ExpectHasErrors(
"message Foo {\n"
" optional double bar = 1 [default = 0x10000000000000000];\n"
"}\n",
"1:37: Integer out of range.\n");
}
TEST_F(ParserValidationErrorTest, FieldOptionOutOfRange) {
ExpectHasErrors(
"message Foo {\n"
" optional double bar = 1 [foo = 0x10000000000000000];\n"
"}\n",
"1:33: Integer out of range.\n");
}
TEST_F(ParserValidationErrorTest, FileOptionNameError) { TEST_F(ParserValidationErrorTest, FileOptionNameError) {
ExpectHasValidationErrors( ExpectHasValidationErrors(
"option foo = 5;", "option foo = 5;",

@ -58,6 +58,7 @@
#include "absl/strings/ascii.h" #include "absl/strings/ascii.h"
#include "absl/strings/escaping.h" #include "absl/strings/escaping.h"
#include "absl/strings/str_cat.h" #include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "google/protobuf/stubs/stringprintf.h" #include "google/protobuf/stubs/stringprintf.h"
#include "absl/strings/str_join.h" #include "absl/strings/str_join.h"
#include "absl/strings/str_split.h" #include "absl/strings/str_split.h"
@ -7675,6 +7676,27 @@ bool DescriptorBuilder::OptionInterpreter::ExamineIfOptionIsSet(
return true; return true;
} }
namespace {
// Helpers for method below
template <typename T> std::string ValueOutOfRange(
absl::string_view type_name, absl::string_view option_name) {
return absl::StrFormat(
"Value out of range, %d to %d, for %s option \"%s\".", \
std::numeric_limits<T>::min(), std::numeric_limits<T>::max(),
type_name, option_name);
}
template <typename T> std::string ValueMustBeInt(
absl::string_view type_name, absl::string_view option_name) {
return absl::StrFormat(
"Value must be integer, from %d to %d, for %s option \"%s\".", \
std::numeric_limits<T>::min(), std::numeric_limits<T>::max(),
type_name, option_name);
}
} // namespace
bool DescriptorBuilder::OptionInterpreter::SetOptionValue( bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
const FieldDescriptor* option_field, UnknownFieldSet* unknown_fields) { const FieldDescriptor* option_field, UnknownFieldSet* unknown_fields) {
// We switch on the CppType to validate. // We switch on the CppType to validate.
@ -7683,8 +7705,7 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
if (uninterpreted_option_->has_positive_int_value()) { if (uninterpreted_option_->has_positive_int_value()) {
if (uninterpreted_option_->positive_int_value() > if (uninterpreted_option_->positive_int_value() >
static_cast<uint64_t>(std::numeric_limits<int32_t>::max())) { static_cast<uint64_t>(std::numeric_limits<int32_t>::max())) {
return AddValueError("Value out of range for int32 option \"" + return AddValueError(ValueOutOfRange<int32_t>("int32", option_field->full_name()));
option_field->full_name() + "\".");
} else { } else {
SetInt32(option_field->number(), SetInt32(option_field->number(),
uninterpreted_option_->positive_int_value(), uninterpreted_option_->positive_int_value(),
@ -7693,16 +7714,14 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
} else if (uninterpreted_option_->has_negative_int_value()) { } else if (uninterpreted_option_->has_negative_int_value()) {
if (uninterpreted_option_->negative_int_value() < if (uninterpreted_option_->negative_int_value() <
static_cast<int64_t>(std::numeric_limits<int32_t>::min())) { static_cast<int64_t>(std::numeric_limits<int32_t>::min())) {
return AddValueError("Value out of range for int32 option \"" + return AddValueError(ValueOutOfRange<int32_t>("int32", option_field->full_name()));
option_field->full_name() + "\".");
} else { } else {
SetInt32(option_field->number(), SetInt32(option_field->number(),
uninterpreted_option_->negative_int_value(), uninterpreted_option_->negative_int_value(),
option_field->type(), unknown_fields); option_field->type(), unknown_fields);
} }
} else { } else {
return AddValueError("Value must be integer for int32 option \"" + return AddValueError(ValueMustBeInt<int32_t>("int32", option_field->full_name()));
option_field->full_name() + "\".");
} }
break; break;
@ -7710,8 +7729,7 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
if (uninterpreted_option_->has_positive_int_value()) { if (uninterpreted_option_->has_positive_int_value()) {
if (uninterpreted_option_->positive_int_value() > if (uninterpreted_option_->positive_int_value() >
static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) { static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) {
return AddValueError("Value out of range for int64 option \"" + return AddValueError(ValueOutOfRange<int64_t>("int64", option_field->full_name()));
option_field->full_name() + "\".");
} else { } else {
SetInt64(option_field->number(), SetInt64(option_field->number(),
uninterpreted_option_->positive_int_value(), uninterpreted_option_->positive_int_value(),
@ -7722,8 +7740,7 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
uninterpreted_option_->negative_int_value(), uninterpreted_option_->negative_int_value(),
option_field->type(), unknown_fields); option_field->type(), unknown_fields);
} else { } else {
return AddValueError("Value must be integer for int64 option \"" + return AddValueError(ValueMustBeInt<int64_t>("int64", option_field->full_name()));
option_field->full_name() + "\".");
} }
break; break;
@ -7731,18 +7748,14 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
if (uninterpreted_option_->has_positive_int_value()) { if (uninterpreted_option_->has_positive_int_value()) {
if (uninterpreted_option_->positive_int_value() > if (uninterpreted_option_->positive_int_value() >
std::numeric_limits<uint32_t>::max()) { std::numeric_limits<uint32_t>::max()) {
return AddValueError("Value out of range for uint32 option \"" + return AddValueError(ValueOutOfRange<uint32_t>("uint32", option_field->full_name()));
option_field->name() + "\".");
} else { } else {
SetUInt32(option_field->number(), SetUInt32(option_field->number(),
uninterpreted_option_->positive_int_value(), uninterpreted_option_->positive_int_value(),
option_field->type(), unknown_fields); option_field->type(), unknown_fields);
} }
} else { } else {
return AddValueError( return AddValueError(ValueMustBeInt<uint32_t>("uint32", option_field->full_name()));
"Value must be non-negative integer for uint32 "
"option \"" +
option_field->full_name() + "\".");
} }
break; break;
@ -7752,10 +7765,7 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
uninterpreted_option_->positive_int_value(), uninterpreted_option_->positive_int_value(),
option_field->type(), unknown_fields); option_field->type(), unknown_fields);
} else { } else {
return AddValueError( return AddValueError(ValueMustBeInt<uint64_t>("uint64", option_field->full_name()));
"Value must be non-negative integer for uint64 "
"option \"" +
option_field->full_name() + "\".");
} }
break; break;

@ -5557,7 +5557,7 @@ TEST_F(ValidationErrorTest, Int32OptionValueOutOfPositiveRange) {
" positive_int_value: 0x80000000 } " " positive_int_value: 0x80000000 } "
"}", "}",
"foo.proto: foo.proto: OPTION_VALUE: Value out of range " "foo.proto: foo.proto: OPTION_VALUE: Value out of range, -2147483648 to 2147483647, "
"for int32 option \"foo\".\n"); "for int32 option \"foo\".\n");
} }
@ -5574,7 +5574,7 @@ TEST_F(ValidationErrorTest, Int32OptionValueOutOfNegativeRange) {
" negative_int_value: -0x80000001 } " " negative_int_value: -0x80000001 } "
"}", "}",
"foo.proto: foo.proto: OPTION_VALUE: Value out of range " "foo.proto: foo.proto: OPTION_VALUE: Value out of range, -2147483648 to 2147483647, "
"for int32 option \"foo\".\n"); "for int32 option \"foo\".\n");
} }
@ -5590,7 +5590,7 @@ TEST_F(ValidationErrorTest, Int32OptionValueIsNotPositiveInt) {
" is_extension: true } " " is_extension: true } "
" string_value: \"5\" } }", " string_value: \"5\" } }",
"foo.proto: foo.proto: OPTION_VALUE: Value must be integer " "foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from -2147483648 to 2147483647, "
"for int32 option \"foo\".\n"); "for int32 option \"foo\".\n");
} }
@ -5608,7 +5608,7 @@ TEST_F(ValidationErrorTest, Int64OptionValueOutOfRange) {
"} " "} "
"}", "}",
"foo.proto: foo.proto: OPTION_VALUE: Value out of range " "foo.proto: foo.proto: OPTION_VALUE: Value out of range, -9223372036854775808 to 9223372036854775807, "
"for int64 option \"foo\".\n"); "for int64 option \"foo\".\n");
} }
@ -5624,7 +5624,7 @@ TEST_F(ValidationErrorTest, Int64OptionValueIsNotPositiveInt) {
" is_extension: true } " " is_extension: true } "
" identifier_value: \"5\" } }", " identifier_value: \"5\" } }",
"foo.proto: foo.proto: OPTION_VALUE: Value must be integer " "foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from -9223372036854775808 to 9223372036854775807, "
"for int64 option \"foo\".\n"); "for int64 option \"foo\".\n");
} }
@ -5640,7 +5640,7 @@ TEST_F(ValidationErrorTest, UInt32OptionValueOutOfRange) {
" is_extension: true } " " is_extension: true } "
" positive_int_value: 0x100000000 } }", " positive_int_value: 0x100000000 } }",
"foo.proto: foo.proto: OPTION_VALUE: Value out of range " "foo.proto: foo.proto: OPTION_VALUE: Value out of range, 0 to 4294967295, "
"for uint32 option \"foo\".\n"); "for uint32 option \"foo\".\n");
} }
@ -5656,7 +5656,7 @@ TEST_F(ValidationErrorTest, UInt32OptionValueIsNotPositiveInt) {
" is_extension: true } " " is_extension: true } "
" double_value: -5.6 } }", " double_value: -5.6 } }",
"foo.proto: foo.proto: OPTION_VALUE: Value must be non-negative integer " "foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from 0 to 4294967295, "
"for uint32 option \"foo\".\n"); "for uint32 option \"foo\".\n");
} }
@ -5672,7 +5672,7 @@ TEST_F(ValidationErrorTest, UInt64OptionValueIsNotPositiveInt) {
" is_extension: true } " " is_extension: true } "
" negative_int_value: -5 } }", " negative_int_value: -5 } }",
"foo.proto: foo.proto: OPTION_VALUE: Value must be non-negative integer " "foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from 0 to 18446744073709551615, "
"for uint64 option \"foo\".\n"); "for uint64 option \"foo\".\n");
} }

@ -1002,9 +1002,20 @@ bool Tokenizer::ParseInteger(const std::string& text, uint64_t max_value,
} }
double Tokenizer::ParseFloat(const std::string& text) { double Tokenizer::ParseFloat(const std::string& text) {
double result = 0;
if (!TryParseFloat(text, &result)) {
GOOGLE_LOG(DFATAL)
<< " Tokenizer::ParseFloat() passed text that could not have been"
" tokenized as a float: "
<< absl::CEscape(text);
}
return result;
}
bool Tokenizer::TryParseFloat(const std::string& text, double* result) {
const char* start = text.c_str(); const char* start = text.c_str();
char* end; char* end;
double result = NoLocaleStrtod(start, &end); *result = NoLocaleStrtod(start, &end);
// "1e" is not a valid float, but if the tokenizer reads it, it will // "1e" is not a valid float, but if the tokenizer reads it, it will
// report an error but still return it as a valid token. We need to // report an error but still return it as a valid token. We need to
@ -1020,12 +1031,7 @@ double Tokenizer::ParseFloat(const std::string& text) {
++end; ++end;
} }
GOOGLE_LOG_IF(DFATAL, return static_cast<size_t>(end - start) == text.size() && *start != '-';
static_cast<size_t>(end - start) != text.size() || *start == '-')
<< " Tokenizer::ParseFloat() passed text that could not have been"
" tokenized as a float: "
<< absl::CEscape(text);
return result;
} }
// Helper to append a Unicode code point to a string as UTF8, without bringing // Helper to append a Unicode code point to a string as UTF8, without bringing

@ -214,6 +214,10 @@ class PROTOBUF_EXPORT Tokenizer {
// result is undefined (possibly an assert failure). // result is undefined (possibly an assert failure).
static double ParseFloat(const std::string& text); static double ParseFloat(const std::string& text);
// Parses given text as if it were a TYPE_FLOAT token. Returns false if the
// given text is not actually a valid float literal.
static bool TryParseFloat(const std::string& text, double* result);
// Parses a TYPE_STRING token. This never fails, so long as the text actually // Parses a TYPE_STRING token. This never fails, so long as the text actually
// comes from a TYPE_STRING token parsed by Tokenizer. If it doesn't, the // comes from a TYPE_STRING token parsed by Tokenizer. If it doesn't, the
// result is undefined (possibly an assert failure). // result is undefined (possibly an assert failure).

Loading…
Cancel
Save