Improve number handling in JSON parsing

- Tighten up on Infinity/NaN handling in terms of whitespace handling (and test casing)
- Validate that values are genuinely integers when they've been parsed from a JSON number (ignoring the fact that 1.0000000000000000001 == 1 as a double...)
- Allow exponents and decimal points in string representations
pull/1087/head
Jon Skeet 9 years ago
parent d19c26f2c8
commit aa431a0481
  1. 47
      csharp/src/Google.Protobuf.Test/JsonParserTest.cs
  2. 95
      csharp/src/Google.Protobuf/JsonParser.cs

@ -39,9 +39,7 @@ using System;
namespace Google.Protobuf namespace Google.Protobuf
{ {
/// <summary> /// <summary>
/// Unit tests for JSON parsing. Some tests are ignored at the moment as the desired behaviour /// Unit tests for JSON parsing.
/// isn't fully known, either in terms of which exceptions should be thrown or whether they should
/// count as valid values.
/// </summary> /// </summary>
public class JsonParserTest public class JsonParserTest
{ {
@ -205,6 +203,8 @@ namespace Google.Protobuf
[Test] [Test]
[TestCase("+0")] [TestCase("+0")]
[TestCase(" 1")]
[TestCase("1 ")]
[TestCase("00")] [TestCase("00")]
[TestCase("-00")] [TestCase("-00")]
[TestCase("--1")] [TestCase("--1")]
@ -318,7 +318,18 @@ namespace Google.Protobuf
[TestCase("1.0.0")] [TestCase("1.0.0")]
[TestCase("+1")] [TestCase("+1")]
[TestCase("00")] [TestCase("00")]
[TestCase("01")]
[TestCase("-00")]
[TestCase("-01")]
[TestCase("--1")] [TestCase("--1")]
[TestCase(" Infinity")]
[TestCase(" -Infinity")]
[TestCase("NaN ")]
[TestCase("Infinity ")]
[TestCase("-Infinity ")]
[TestCase(" NaN")]
[TestCase("INFINITY")]
[TestCase("nan")]
[TestCase("\u00BD")] // 1/2 as a single Unicode character. Just sanity checking... [TestCase("\u00BD")] // 1/2 as a single Unicode character. Just sanity checking...
public void StringToDouble_Invalid(string jsonValue) public void StringToDouble_Invalid(string jsonValue)
{ {
@ -363,6 +374,10 @@ namespace Google.Protobuf
[TestCase("-1", -1)] [TestCase("-1", -1)]
[TestCase("2147483647", 2147483647)] [TestCase("2147483647", 2147483647)]
[TestCase("-2147483648", -2147483648)] [TestCase("-2147483648", -2147483648)]
[TestCase("1e1", 10)]
[TestCase("-1e1", -10)]
[TestCase("10.00", 10)]
[TestCase("-10.00", -10)]
public void NumberToInt32_Valid(string jsonValue, int expectedParsedValue) public void NumberToInt32_Valid(string jsonValue, int expectedParsedValue)
{ {
string json = "{ \"singleInt32\": " + jsonValue + "}"; string json = "{ \"singleInt32\": " + jsonValue + "}";
@ -376,7 +391,8 @@ namespace Google.Protobuf
[TestCase("-00", typeof(InvalidJsonException))] [TestCase("-00", typeof(InvalidJsonException))]
[TestCase("--1", typeof(InvalidJsonException))] [TestCase("--1", typeof(InvalidJsonException))]
[TestCase("+1", typeof(InvalidJsonException))] [TestCase("+1", typeof(InvalidJsonException))]
[TestCase("1.5", typeof(InvalidProtocolBufferException), Ignore = true, Reason = "Desired behaviour unclear")] [TestCase("1.5", typeof(InvalidProtocolBufferException))]
// Value is out of range
[TestCase("1e10", typeof(InvalidProtocolBufferException))] [TestCase("1e10", typeof(InvalidProtocolBufferException))]
[TestCase("2147483648", typeof(InvalidProtocolBufferException))] [TestCase("2147483648", typeof(InvalidProtocolBufferException))]
[TestCase("-2147483649", typeof(InvalidProtocolBufferException))] [TestCase("-2147483649", typeof(InvalidProtocolBufferException))]
@ -411,8 +427,10 @@ namespace Google.Protobuf
[TestCase("0", 0L)] [TestCase("0", 0L)]
[TestCase("1", 1L)] [TestCase("1", 1L)]
[TestCase("-1", -1L)] [TestCase("-1", -1L)]
[TestCase("9223372036854775807", 9223372036854775807, Ignore = true, Reason = "Desired behaviour unclear")] // long.MaxValue isn't actually representable as a double. This string value is the highest
[TestCase("-9223372036854775808", -9223372036854775808, Ignore = true, Reason = "Desired behaviour unclear")] // representable value which isn't greater than long.MaxValue.
[TestCase("9223372036854769664", 9223372036854769664)]
[TestCase("-9223372036854775808", -9223372036854775808)]
public void NumberToInt64_Valid(string jsonValue, long expectedParsedValue) public void NumberToInt64_Valid(string jsonValue, long expectedParsedValue)
{ {
string json = "{ \"singleInt64\": " + jsonValue + "}"; string json = "{ \"singleInt64\": " + jsonValue + "}";
@ -422,8 +440,11 @@ namespace Google.Protobuf
// Assume that anything non-bounds-related is covered in the Int32 case // Assume that anything non-bounds-related is covered in the Int32 case
[Test] [Test]
[TestCase("-9223372036854775809", Ignore = true, Reason = "Desired behaviour unclear")] [TestCase("9223372036854775808")]
[TestCase("9223372036854775808", Ignore = true, Reason = "Desired behaviour unclear")] // Theoretical bound would be -9223372036854775809, but when that is parsed to a double
// we end up with the exact value of long.MinValue due to lack of precision. The value here
// is the "next double down".
[TestCase("-9223372036854780000")]
public void NumberToInt64_Invalid(string jsonValue) public void NumberToInt64_Invalid(string jsonValue)
{ {
string json = "{ \"singleInt64\": " + jsonValue + "}"; string json = "{ \"singleInt64\": " + jsonValue + "}";
@ -433,7 +454,9 @@ namespace Google.Protobuf
[Test] [Test]
[TestCase("0", 0UL)] [TestCase("0", 0UL)]
[TestCase("1", 1UL)] [TestCase("1", 1UL)]
[TestCase("18446744073709551615", 18446744073709551615, Ignore = true, Reason = "Desired behaviour unclear")] // ulong.MaxValue isn't representable as a double. This value is the largest double within
// the range of ulong.
[TestCase("18446744073709500416", 18446744073709500416UL)]
public void NumberToUInt64_Valid(string jsonValue, ulong expectedParsedValue) public void NumberToUInt64_Valid(string jsonValue, ulong expectedParsedValue)
{ {
string json = "{ \"singleUint64\": " + jsonValue + "}"; string json = "{ \"singleUint64\": " + jsonValue + "}";
@ -475,9 +498,9 @@ namespace Google.Protobuf
} }
[Test] [Test]
[TestCase("1.7977e308", Ignore = true, Reason = "Desired behaviour unclear")] [TestCase("1.7977e308")]
[TestCase("-1.7977e308", Ignore = true, Reason = "Desired behaviour unclear")] [TestCase("-1.7977e308")]
[TestCase("1e309", Ignore = true, Reason = "Desired behaviour unclear")] [TestCase("1e309")]
[TestCase("1,0")] [TestCase("1,0")]
[TestCase("1.0.0")] [TestCase("1.0.0")]
[TestCase("+1")] [TestCase("+1")]

@ -540,17 +540,17 @@ namespace Google.Protobuf
case FieldType.Int32: case FieldType.Int32:
case FieldType.SInt32: case FieldType.SInt32:
case FieldType.SFixed32: case FieldType.SFixed32:
return ParseNumericString(keyText, int.Parse, false); return ParseNumericString(keyText, int.Parse);
case FieldType.UInt32: case FieldType.UInt32:
case FieldType.Fixed32: case FieldType.Fixed32:
return ParseNumericString(keyText, uint.Parse, false); return ParseNumericString(keyText, uint.Parse);
case FieldType.Int64: case FieldType.Int64:
case FieldType.SInt64: case FieldType.SInt64:
case FieldType.SFixed64: case FieldType.SFixed64:
return ParseNumericString(keyText, long.Parse, false); return ParseNumericString(keyText, long.Parse);
case FieldType.UInt64: case FieldType.UInt64:
case FieldType.Fixed64: case FieldType.Fixed64:
return ParseNumericString(keyText, ulong.Parse, false); return ParseNumericString(keyText, ulong.Parse);
default: default:
throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType); throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType);
} }
@ -561,7 +561,6 @@ namespace Google.Protobuf
double value = token.NumberValue; double value = token.NumberValue;
checked checked
{ {
// TODO: Validate that it's actually an integer, possibly in terms of the textual representation?
try try
{ {
switch (field.FieldType) switch (field.FieldType)
@ -569,16 +568,20 @@ namespace Google.Protobuf
case FieldType.Int32: case FieldType.Int32:
case FieldType.SInt32: case FieldType.SInt32:
case FieldType.SFixed32: case FieldType.SFixed32:
CheckInteger(value);
return (int) value; return (int) value;
case FieldType.UInt32: case FieldType.UInt32:
case FieldType.Fixed32: case FieldType.Fixed32:
CheckInteger(value);
return (uint) value; return (uint) value;
case FieldType.Int64: case FieldType.Int64:
case FieldType.SInt64: case FieldType.SInt64:
case FieldType.SFixed64: case FieldType.SFixed64:
CheckInteger(value);
return (long) value; return (long) value;
case FieldType.UInt64: case FieldType.UInt64:
case FieldType.Fixed64: case FieldType.Fixed64:
CheckInteger(value);
return (ulong) value; return (ulong) value;
case FieldType.Double: case FieldType.Double:
return value; return value;
@ -597,20 +600,32 @@ namespace Google.Protobuf
{ {
return float.NegativeInfinity; return float.NegativeInfinity;
} }
throw new InvalidProtocolBufferException("Value out of range: " + value); throw new InvalidProtocolBufferException($"Value out of range: {value}");
} }
return (float) value; return (float) value;
default: default:
throw new InvalidProtocolBufferException("Unsupported conversion from JSON number for field type " + field.FieldType); throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}");
} }
} }
catch (OverflowException) catch (OverflowException)
{ {
throw new InvalidProtocolBufferException("Value out of range: " + value); throw new InvalidProtocolBufferException($"Value out of range: {value}");
} }
} }
} }
private static void CheckInteger(double value)
{
if (double.IsInfinity(value) || double.IsNaN(value))
{
throw new InvalidProtocolBufferException($"Value not an integer: {value}");
}
if (value != Math.Floor(value))
{
throw new InvalidProtocolBufferException($"Value not an integer: {value}");
}
}
private static object ParseSingleStringValue(FieldDescriptor field, string text) private static object ParseSingleStringValue(FieldDescriptor field, string text)
{ {
switch (field.FieldType) switch (field.FieldType)
@ -622,43 +637,35 @@ namespace Google.Protobuf
case FieldType.Int32: case FieldType.Int32:
case FieldType.SInt32: case FieldType.SInt32:
case FieldType.SFixed32: case FieldType.SFixed32:
return ParseNumericString(text, int.Parse, false); return ParseNumericString(text, int.Parse);
case FieldType.UInt32: case FieldType.UInt32:
case FieldType.Fixed32: case FieldType.Fixed32:
return ParseNumericString(text, uint.Parse, false); return ParseNumericString(text, uint.Parse);
case FieldType.Int64: case FieldType.Int64:
case FieldType.SInt64: case FieldType.SInt64:
case FieldType.SFixed64: case FieldType.SFixed64:
return ParseNumericString(text, long.Parse, false); return ParseNumericString(text, long.Parse);
case FieldType.UInt64: case FieldType.UInt64:
case FieldType.Fixed64: case FieldType.Fixed64:
return ParseNumericString(text, ulong.Parse, false); return ParseNumericString(text, ulong.Parse);
case FieldType.Double: case FieldType.Double:
double d = ParseNumericString(text, double.Parse, true); double d = ParseNumericString(text, double.Parse);
// double.Parse can return +/- infinity on Mono for non-infinite values which are out of range for double. ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d));
if (double.IsInfinity(d) && !text.Contains("Infinity"))
{
throw new InvalidProtocolBufferException("Invalid numeric value: " + text);
}
return d; return d;
case FieldType.Float: case FieldType.Float:
float f = ParseNumericString(text, float.Parse, true); float f = ParseNumericString(text, float.Parse);
// float.Parse can return +/- infinity on Mono for non-infinite values which are out of range for float. ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f));
if (float.IsInfinity(f) && !text.Contains("Infinity"))
{
throw new InvalidProtocolBufferException("Invalid numeric value: " + text);
}
return f; return f;
case FieldType.Enum: case FieldType.Enum:
var enumValue = field.EnumType.FindValueByName(text); var enumValue = field.EnumType.FindValueByName(text);
if (enumValue == null) if (enumValue == null)
{ {
throw new InvalidProtocolBufferException("Invalid enum value: " + text + " for enum type: " + field.EnumType.FullName); throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}");
} }
// Just return it as an int, and let the CLR convert it. // Just return it as an int, and let the CLR convert it.
return enumValue.Number; return enumValue.Number;
default: default:
throw new InvalidProtocolBufferException("Unsupported conversion from JSON string for field type " + field.FieldType); throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}");
} }
} }
@ -670,43 +677,53 @@ namespace Google.Protobuf
return field.MessageType.Parser.CreateTemplate(); return field.MessageType.Parser.CreateTemplate();
} }
private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser, bool floatingPoint) private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser)
{ {
// TODO: Prohibit leading zeroes (but allow 0!)
// TODO: Validate handling of "Infinity" etc. (Should be case sensitive, no leading whitespace etc)
// Can't prohibit this with NumberStyles. // Can't prohibit this with NumberStyles.
if (text.StartsWith("+")) if (text.StartsWith("+"))
{ {
throw new InvalidProtocolBufferException("Invalid numeric value: " + text); throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
} }
if (text.StartsWith("0") && text.Length > 1) if (text.StartsWith("0") && text.Length > 1)
{ {
if (text[1] >= '0' && text[1] <= '9') if (text[1] >= '0' && text[1] <= '9')
{ {
throw new InvalidProtocolBufferException("Invalid numeric value: " + text); throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
} }
} }
else if (text.StartsWith("-0") && text.Length > 2) else if (text.StartsWith("-0") && text.Length > 2)
{ {
if (text[2] >= '0' && text[2] <= '9') if (text[2] >= '0' && text[2] <= '9')
{ {
throw new InvalidProtocolBufferException("Invalid numeric value: " + text); throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
} }
} }
try try
{ {
var styles = floatingPoint return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture);
? NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent
: NumberStyles.AllowLeadingSign;
return parser(text, styles, CultureInfo.InvariantCulture);
} }
catch (FormatException) catch (FormatException)
{ {
throw new InvalidProtocolBufferException("Invalid numeric value for type: " + text); throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}");
} }
catch (OverflowException) catch (OverflowException)
{ {
throw new InvalidProtocolBufferException("Value out of range: " + text); throw new InvalidProtocolBufferException($"Value out of range: {text}");
}
}
/// <summary>
/// Checks that any infinite/NaN values originated from the correct text.
/// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the
/// way that Mono parses out-of-range values as infinity.
/// </summary>
private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)
{
if ((isPositiveInfinity && text != "Infinity") ||
(isNegativeInfinity && text != "-Infinity") ||
(isNaN && text != "NaN"))
{
throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
} }
} }
@ -719,7 +736,7 @@ namespace Google.Protobuf
var match = TimestampRegex.Match(token.StringValue); var match = TimestampRegex.Match(token.StringValue);
if (!match.Success) if (!match.Success)
{ {
throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}");
} }
var dateTime = match.Groups["datetime"].Value; var dateTime = match.Groups["datetime"].Value;
var subseconds = match.Groups["subseconds"].Value; var subseconds = match.Groups["subseconds"].Value;

Loading…
Cancel
Save