Improve number handling in JSON parsing

- Tighten up on Infinity/NaN handling in terms of whitespace handling (and test casing) - Validate that values are genuinely integers when they've been parsed from a JSON number (ignoring the fact that 1.0000000000000000001 == 1 as a double...) - Allow exponents and decimal points in string representations
9 years ago · aa431a0481
parent d19c26f2c8
commit aa431a0481
2 changed files with 91 additions and 51 deletions
--- a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs
+++ b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs
@ -39,9 +39,7 @@ using System;
 namespace Google.Protobuf
 {
    /// <summary>
-    /// Unit tests for JSON parsing. Some tests are ignored at the moment as the desired behaviour
+    /// Unit tests for JSON parsing.
    /// isn't fully known, either in terms of which exceptions should be thrown or whether they should
    /// count as valid values.
    /// </summary>
    public class JsonParserTest
    {
@ -205,6 +203,8 @@ namespace Google.Protobuf
        [Test]
        [TestCase("+0")]
        [TestCase(" 1")]
        [TestCase("1 ")]
        [TestCase("00")]
        [TestCase("-00")]
        [TestCase("--1")]
@ -318,7 +318,18 @@ namespace Google.Protobuf
        [TestCase("1.0.0")]
        [TestCase("+1")]
        [TestCase("00")]
        [TestCase("01")]
        [TestCase("-00")]
        [TestCase("-01")]
        [TestCase("--1")]
        [TestCase(" Infinity")]
        [TestCase(" -Infinity")]
        [TestCase("NaN ")]
        [TestCase("Infinity ")]
        [TestCase("-Infinity ")]
        [TestCase(" NaN")]
        [TestCase("INFINITY")]
        [TestCase("nan")]
        [TestCase("\u00BD")] // 1/2 as a single Unicode character. Just sanity checking...
        public void StringToDouble_Invalid(string jsonValue)
        {
@ -363,6 +374,10 @@ namespace Google.Protobuf
        [TestCase("-1", -1)]
        [TestCase("2147483647", 2147483647)]
        [TestCase("-2147483648", -2147483648)]
        [TestCase("1e1", 10)]
        [TestCase("-1e1", -10)]
        [TestCase("10.00", 10)]
        [TestCase("-10.00", -10)]
        public void NumberToInt32_Valid(string jsonValue, int expectedParsedValue)
        {
            string json = "{ \"singleInt32\": " + jsonValue + "}";
@ -376,7 +391,8 @@ namespace Google.Protobuf
        [TestCase("-00", typeof(InvalidJsonException))]
        [TestCase("--1", typeof(InvalidJsonException))]
        [TestCase("+1", typeof(InvalidJsonException))]
-        [TestCase("1.5", typeof(InvalidProtocolBufferException), Ignore = true, Reason = "Desired behaviour unclear")]
+        [TestCase("1.5", typeof(InvalidProtocolBufferException))]
        // Value is out of range
        [TestCase("1e10", typeof(InvalidProtocolBufferException))]
        [TestCase("2147483648", typeof(InvalidProtocolBufferException))]
        [TestCase("-2147483649", typeof(InvalidProtocolBufferException))]
@ -411,8 +427,10 @@ namespace Google.Protobuf
        [TestCase("0", 0L)]
        [TestCase("1", 1L)]
        [TestCase("-1", -1L)]
-        [TestCase("9223372036854775807", 9223372036854775807, Ignore = true, Reason = "Desired behaviour unclear")]
+        // long.MaxValue isn't actually representable as a double. This string value is the highest
-        [TestCase("-9223372036854775808", -9223372036854775808, Ignore = true, Reason = "Desired behaviour unclear")]
+        // representable value which isn't greater than long.MaxValue.
        [TestCase("9223372036854769664", 9223372036854769664)]
        [TestCase("-9223372036854775808", -9223372036854775808)]
        public void NumberToInt64_Valid(string jsonValue, long expectedParsedValue)
        {
            string json = "{ \"singleInt64\": " + jsonValue + "}";
@ -422,8 +440,11 @@ namespace Google.Protobuf
        // Assume that anything non-bounds-related is covered in the Int32 case
        [Test]
-        [TestCase("-9223372036854775809", Ignore = true, Reason = "Desired behaviour unclear")]
+        [TestCase("9223372036854775808")]
-        [TestCase("9223372036854775808", Ignore = true, Reason = "Desired behaviour unclear")]
+        // Theoretical bound would be -9223372036854775809, but when that is parsed to a double
        // we end up with the exact value of long.MinValue due to lack of precision. The value here
        // is the "next double down".
        [TestCase("-9223372036854780000")]
        public void NumberToInt64_Invalid(string jsonValue)
        {
            string json = "{ \"singleInt64\": " + jsonValue + "}";
@ -433,7 +454,9 @@ namespace Google.Protobuf
        [Test]
        [TestCase("0", 0UL)]
        [TestCase("1", 1UL)]
-        [TestCase("18446744073709551615", 18446744073709551615, Ignore = true, Reason = "Desired behaviour unclear")]
+        // ulong.MaxValue isn't representable as a double. This value is the largest double within
        // the range of ulong.
        [TestCase("18446744073709500416", 18446744073709500416UL)]
        public void NumberToUInt64_Valid(string jsonValue, ulong expectedParsedValue)
        {
            string json = "{ \"singleUint64\": " + jsonValue + "}";
@ -475,9 +498,9 @@ namespace Google.Protobuf
        }
        [Test]
-        [TestCase("1.7977e308", Ignore = true, Reason = "Desired behaviour unclear")]
+        [TestCase("1.7977e308")]
-        [TestCase("-1.7977e308", Ignore = true, Reason = "Desired behaviour unclear")]
+        [TestCase("-1.7977e308")]
-        [TestCase("1e309", Ignore = true, Reason = "Desired behaviour unclear")]
+        [TestCase("1e309")]
        [TestCase("1,0")]
        [TestCase("1.0.0")]
        [TestCase("+1")]
--- a/csharp/src/Google.Protobuf/JsonParser.cs
+++ b/csharp/src/Google.Protobuf/JsonParser.cs
@ -540,17 +540,17 @@ namespace Google.Protobuf
                case FieldType.Int32:
                case FieldType.SInt32:
                case FieldType.SFixed32:
-                    return ParseNumericString(keyText, int.Parse, false);
+                    return ParseNumericString(keyText, int.Parse);
                case FieldType.UInt32:
                case FieldType.Fixed32:
-                    return ParseNumericString(keyText, uint.Parse, false);
+                    return ParseNumericString(keyText, uint.Parse);
                case FieldType.Int64:
                case FieldType.SInt64:
                case FieldType.SFixed64:
-                    return ParseNumericString(keyText, long.Parse, false);
+                    return ParseNumericString(keyText, long.Parse);
                case FieldType.UInt64:
                case FieldType.Fixed64:
-                    return ParseNumericString(keyText, ulong.Parse, false);
+                    return ParseNumericString(keyText, ulong.Parse);
                default:
                    throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType);
            }
@ -561,7 +561,6 @@ namespace Google.Protobuf
            double value = token.NumberValue;
            checked
            {
                // TODO: Validate that it's actually an integer, possibly in terms of the textual representation?                
                try
                {
                    switch (field.FieldType)
@ -569,16 +568,20 @@ namespace Google.Protobuf
                        case FieldType.Int32:
                        case FieldType.SInt32:
                        case FieldType.SFixed32:
                            CheckInteger(value);
                            return (int) value;
                        case FieldType.UInt32:
                        case FieldType.Fixed32:
                            CheckInteger(value);
                            return (uint) value;
                        case FieldType.Int64:
                        case FieldType.SInt64:
                        case FieldType.SFixed64:
                            CheckInteger(value);
                            return (long) value;
                        case FieldType.UInt64:
                        case FieldType.Fixed64:
                            CheckInteger(value);
                            return (ulong) value;
                        case FieldType.Double:
                            return value;
@ -597,20 +600,32 @@ namespace Google.Protobuf
                                {
                                    return float.NegativeInfinity;
                                }
-                                throw new InvalidProtocolBufferException("Value out of range: " + value);
+                                throw new InvalidProtocolBufferException($"Value out of range: {value}");
                            }
                            return (float) value;
                        default:
-                            throw new InvalidProtocolBufferException("Unsupported conversion from JSON number for field type " + field.FieldType);
+                            throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}");
                    }
                }
                catch (OverflowException)
                {
-                    throw new InvalidProtocolBufferException("Value out of range: " + value);
+                    throw new InvalidProtocolBufferException($"Value out of range: {value}");
                }
            }
        }
        private static void CheckInteger(double value)
        {
            if (double.IsInfinity(value) || double.IsNaN(value))
            {
                throw new InvalidProtocolBufferException($"Value not an integer: {value}");
            }
            if (value != Math.Floor(value))
            {
                throw new InvalidProtocolBufferException($"Value not an integer: {value}");
            }            
        }
        private static object ParseSingleStringValue(FieldDescriptor field, string text)
        {
            switch (field.FieldType)
@ -622,43 +637,35 @@ namespace Google.Protobuf
                case FieldType.Int32:
                case FieldType.SInt32:
                case FieldType.SFixed32:
-                    return ParseNumericString(text, int.Parse, false);
+                    return ParseNumericString(text, int.Parse);
                case FieldType.UInt32:
                case FieldType.Fixed32:
-                    return ParseNumericString(text, uint.Parse, false);
+                    return ParseNumericString(text, uint.Parse);
                case FieldType.Int64:
                case FieldType.SInt64:
                case FieldType.SFixed64:
-                    return ParseNumericString(text, long.Parse, false);
+                    return ParseNumericString(text, long.Parse);
                case FieldType.UInt64:
                case FieldType.Fixed64:
-                    return ParseNumericString(text, ulong.Parse, false);
+                    return ParseNumericString(text, ulong.Parse);
                case FieldType.Double:
-                    double d = ParseNumericString(text, double.Parse, true);
+                    double d = ParseNumericString(text, double.Parse);
-                    // double.Parse can return +/- infinity on Mono for non-infinite values which are out of range for double.
+                    ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d));
                    if (double.IsInfinity(d) && !text.Contains("Infinity"))
                    {
                        throw new InvalidProtocolBufferException("Invalid numeric value: " + text);
                    }
                    return d;
                case FieldType.Float:
-                    float f = ParseNumericString(text, float.Parse, true);
+                    float f = ParseNumericString(text, float.Parse);
-                    // float.Parse can return +/- infinity on Mono for non-infinite values which are out of range for float.
+                    ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f));
                    if (float.IsInfinity(f) && !text.Contains("Infinity"))
                    {
                        throw new InvalidProtocolBufferException("Invalid numeric value: " + text);
                    }
                    return f;
                case FieldType.Enum:
                    var enumValue = field.EnumType.FindValueByName(text);
                    if (enumValue == null)
                    {
-                        throw new InvalidProtocolBufferException("Invalid enum value: " + text + " for enum type: " + field.EnumType.FullName);
+                        throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}");
                    }
                    // Just return it as an int, and let the CLR convert it.
                    return enumValue.Number;
                default:
-                    throw new InvalidProtocolBufferException("Unsupported conversion from JSON string for field type " + field.FieldType);
+                    throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}");
            }
        }
@ -670,43 +677,53 @@ namespace Google.Protobuf
            return field.MessageType.Parser.CreateTemplate();
        }
-        private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser, bool floatingPoint)
+        private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser)
        {
            // TODO: Prohibit leading zeroes (but allow 0!)
            // TODO: Validate handling of "Infinity" etc. (Should be case sensitive, no leading whitespace etc)
            // Can't prohibit this with NumberStyles.
            if (text.StartsWith("+"))
            {
-                throw new InvalidProtocolBufferException("Invalid numeric value: " + text);
+                throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
            }
            if (text.StartsWith("0") && text.Length > 1)
            {
                if (text[1] >= '0' && text[1] <= '9')
                {
-                    throw new InvalidProtocolBufferException("Invalid numeric value: " + text);
+                    throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
                }
            }
            else if (text.StartsWith("-0") && text.Length > 2)
            {
                if (text[2] >= '0' && text[2] <= '9')
                {
-                    throw new InvalidProtocolBufferException("Invalid numeric value: " + text);
+                    throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
                }
            }
            try
            {
-                var styles = floatingPoint
+                return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture);
                    ? NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent
                    : NumberStyles.AllowLeadingSign;
                return parser(text, styles, CultureInfo.InvariantCulture);
            }
            catch (FormatException)
            {
-                throw new InvalidProtocolBufferException("Invalid numeric value for type: " + text);
+                throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}");
            }
            catch (OverflowException)
            {
-                throw new InvalidProtocolBufferException("Value out of range: " + text);
+                throw new InvalidProtocolBufferException($"Value out of range: {text}");
            }
        }
        /// <summary>
        /// Checks that any infinite/NaN values originated from the correct text.
        /// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the
        /// way that Mono parses out-of-range values as infinity.
        /// </summary>
        private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)
        {
            if ((isPositiveInfinity && text != "Infinity") ||
                (isNegativeInfinity && text != "-Infinity") ||
                (isNaN && text != "NaN"))
            {
                throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
            }
        }
@ -719,7 +736,7 @@ namespace Google.Protobuf
            var match = TimestampRegex.Match(token.StringValue);
            if (!match.Success)
            {
-                throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
+                throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}");
            }
            var dateTime = match.Groups["datetime"].Value;
            var subseconds = match.Groups["subseconds"].Value;