diff --git a/Makefile.am b/Makefile.am index 6223301279..7db38f0504 100644 --- a/Makefile.am +++ b/Makefile.am @@ -82,6 +82,8 @@ csharp_EXTRA_DIST= \ csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj \ csharp/src/Google.Protobuf.Test/IssuesTest.cs \ csharp/src/Google.Protobuf.Test/JsonFormatterTest.cs \ + csharp/src/Google.Protobuf.Test/JsonParserTest.cs \ + csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs \ csharp/src/Google.Protobuf.Test/Properties/AppManifest.xml \ csharp/src/Google.Protobuf.Test/Properties/AssemblyInfo.cs \ csharp/src/Google.Protobuf.Test/Reflection/DescriptorsTest.cs \ @@ -119,6 +121,9 @@ csharp_EXTRA_DIST= \ csharp/src/Google.Protobuf/IMessage.cs \ csharp/src/Google.Protobuf/InvalidProtocolBufferException.cs \ csharp/src/Google.Protobuf/JsonFormatter.cs \ + csharp/src/Google.Protobuf/JsonParser.cs \ + csharp/src/Google.Protobuf/JsonToken.cs \ + csharp/src/Google.Protobuf/JsonTokenizer.cs \ csharp/src/Google.Protobuf/LimitedInputStream.cs \ csharp/src/Google.Protobuf/MessageExtensions.cs \ csharp/src/Google.Protobuf/MessageParser.cs \ diff --git a/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj b/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj index 33be5daeae..cd15b968ba 100644 --- a/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj +++ b/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj @@ -95,6 +95,8 @@ + + diff --git a/csharp/src/Google.Protobuf.Test/JsonFormatterTest.cs b/csharp/src/Google.Protobuf.Test/JsonFormatterTest.cs index 3e7a3d2e11..08cedad8fe 100644 --- a/csharp/src/Google.Protobuf.Test/JsonFormatterTest.cs +++ b/csharp/src/Google.Protobuf.Test/JsonFormatterTest.cs @@ -275,6 +275,13 @@ namespace Google.Protobuf AssertJson(expectedJson, JsonFormatter.Default.Format(message)); } + [Test] + public void WrapperFormatting_Message() + { + Assert.AreEqual("\"\"", JsonFormatter.Default.Format(new StringValue())); + Assert.AreEqual("0", JsonFormatter.Default.Format(new Int32Value())); + } + [Test] public void WrapperFormatting_IncludeNull() { @@ -376,12 +383,12 @@ namespace Google.Protobuf { Fields = { - { "a", new Value { NullValue = new NullValue() } }, - { "b", new Value { BoolValue = false } }, - { "c", new Value { NumberValue = 10.5 } }, - { "d", new Value { StringValue = "text" } }, - { "e", new Value { ListValue = new ListValue { Values = { new Value { StringValue = "t1" }, new Value { NumberValue = 5 } } } } }, - { "f", new Value { StructValue = new Struct { Fields = { { "nested", new Value { StringValue = "value" } } } } } } + { "a", Value.ForNull() }, + { "b", Value.ForBool(false) }, + { "c", Value.ForNumber(10.5) }, + { "d", Value.ForString("text") }, + { "e", Value.ForList(Value.ForString("t1"), Value.ForNumber(5)) }, + { "f", Value.ForStruct(new Struct { Fields = { { "nested", Value.ForString("value") } } }) } } }; AssertJson("{ 'a': null, 'b': false, 'c': 10.5, 'd': 'text', 'e': [ 't1', 5 ], 'f': { 'nested': 'value' } }", message.ToString()); @@ -405,6 +412,14 @@ namespace Google.Protobuf AssertJson("{ 'fieldMaskField': 'user.displayName,photo' }", JsonFormatter.Default.Format(message)); } + // SourceContext is an example of a well-known type with no special JSON handling + [Test] + public void SourceContextStandalone() + { + var message = new SourceContext { FileName = "foo.proto" }; + AssertJson("{ 'fileName': 'foo.proto' }", JsonFormatter.Default.Format(message)); + } + /// /// Checks that the actual JSON is the same as the expected JSON - but after replacing /// all apostrophes in the expected JSON with double quotes. This basically makes the tests easier diff --git a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs new file mode 100644 index 0000000000..b1c7b46cb2 --- /dev/null +++ b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs @@ -0,0 +1,727 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +using Google.Protobuf.TestProtos; +using Google.Protobuf.WellKnownTypes; +using NUnit.Framework; +using System; + +namespace Google.Protobuf +{ + /// + /// Unit tests for JSON parsing. Some tests are ignored at the moment as the desired behaviour + /// isn't fully known, either in terms of which exceptions should be thrown or whether they should + /// count as valid values. + /// + public class JsonParserTest + { + // Sanity smoke test + [Test] + public void AllTypesRoundtrip() + { + AssertRoundtrip(SampleMessages.CreateFullTestAllTypes()); + } + + [Test] + public void Maps() + { + AssertRoundtrip(new TestMap { MapStringString = { { "with spaces", "bar" }, { "a", "b" } } }); + AssertRoundtrip(new TestMap { MapInt32Int32 = { { 0, 1 }, { 2, 3 } } }); + AssertRoundtrip(new TestMap { MapBoolBool = { { false, true }, { true, false } } }); + } + + [Test] + [TestCase(" 1 ")] + [TestCase("+1")] + [TestCase("1,000")] + [TestCase("1.5")] + public void IntegerMapKeysAreStrict(string keyText) + { + // Test that integer parsing is strict. We assume that if this is correct for int32, + // it's correct for other numeric key types. + var json = "{ \"mapInt32Int32\": { \"" + keyText + "\" : \"1\" } }"; + Assert.Throws(() => JsonParser.Default.Parse(json)); + } + + [Test] + public void SourceContextRoundtrip() + { + AssertRoundtrip(new SourceContext { FileName = "foo.proto" }); + } + + [Test] + public void SingularWrappers_DefaultNonNullValues() + { + var message = new TestWellKnownTypes + { + StringField = "", + BytesField = ByteString.Empty, + BoolField = false, + FloatField = 0f, + DoubleField = 0d, + Int32Field = 0, + Int64Field = 0, + Uint32Field = 0, + Uint64Field = 0 + }; + AssertRoundtrip(message); + } + + [Test] + public void SingularWrappers_NonDefaultValues() + { + var message = new TestWellKnownTypes + { + StringField = "x", + BytesField = ByteString.CopyFrom(1, 2, 3), + BoolField = true, + FloatField = 12.5f, + DoubleField = 12.25d, + Int32Field = 1, + Int64Field = 2, + Uint32Field = 3, + Uint64Field = 4 + }; + AssertRoundtrip(message); + } + + [Test] + public void SingularWrappers_ExplicitNulls() + { + var message = new TestWellKnownTypes(); + var json = new JsonFormatter(new JsonFormatter.Settings(true)).Format(message); + var parsed = JsonParser.Default.Parse(json); + Assert.AreEqual(message, parsed); + } + + [Test] + [TestCase(typeof(Int32Value), "32", 32)] + [TestCase(typeof(Int64Value), "32", 32L)] + [TestCase(typeof(UInt32Value), "32", 32U)] + [TestCase(typeof(UInt64Value), "32", 32UL)] + [TestCase(typeof(StringValue), "\"foo\"", "foo")] + [TestCase(typeof(FloatValue), "1.5", 1.5f)] + [TestCase(typeof(DoubleValue), "1.5", 1.5d)] + public void Wrappers_Standalone(System.Type wrapperType, string json, object expectedValue) + { + IMessage parsed = (IMessage) Activator.CreateInstance(wrapperType); + IMessage expected = (IMessage) Activator.CreateInstance(wrapperType); + JsonParser.Default.Merge(parsed, "null"); + Assert.AreEqual(expected, parsed); + + JsonParser.Default.Merge(parsed, json); + expected.Descriptor.Fields[Wrappers.WrapperValueFieldNumber].Accessor.SetValue(expected, expectedValue); + Assert.AreEqual(expected, parsed); + } + + [Test] + public void BytesWrapper_Standalone() + { + ByteString data = ByteString.CopyFrom(1, 2, 3); + // Can't do this with attributes... + var parsed = JsonParser.Default.Parse("\"" + data.ToBase64() + "\""); + var expected = new BytesValue { Value = data }; + Assert.AreEqual(expected, parsed); + } + + [Test] + public void RepeatedWrappers() + { + var message = new RepeatedWellKnownTypes + { + BoolField = { true, false }, + BytesField = { ByteString.CopyFrom(1, 2, 3), ByteString.CopyFrom(4, 5, 6), ByteString.Empty }, + DoubleField = { 12.5, -1.5, 0d }, + FloatField = { 123.25f, -20f, 0f }, + Int32Field = { int.MaxValue, int.MinValue, 0 }, + Int64Field = { long.MaxValue, long.MinValue, 0L }, + StringField = { "First", "Second", "" }, + Uint32Field = { uint.MaxValue, uint.MinValue, 0U }, + Uint64Field = { ulong.MaxValue, ulong.MinValue, 0UL }, + }; + AssertRoundtrip(message); + } + + [Test] + public void IndividualWrapperTypes() + { + Assert.AreEqual(new StringValue { Value = "foo" }, StringValue.Parser.ParseJson("\"foo\"")); + Assert.AreEqual(new Int32Value { Value = 1 }, Int32Value.Parser.ParseJson("1")); + // Can parse strings directly too + Assert.AreEqual(new Int32Value { Value = 1 }, Int32Value.Parser.ParseJson("\"1\"")); + } + + private static void AssertRoundtrip(T message) where T : IMessage, new() + { + var clone = message.Clone(); + var json = message.ToString(); + var parsed = JsonParser.Default.Parse(json); + Assert.AreEqual(clone, parsed); + } + + [Test] + [TestCase("0", 0)] + [TestCase("-0", 0)] // Not entirely clear whether we intend to allow this... + [TestCase("1", 1)] + [TestCase("-1", -1)] + [TestCase("2147483647", 2147483647)] + [TestCase("-2147483648", -2147483648)] + public void StringToInt32_Valid(string jsonValue, int expectedParsedValue) + { + string json = "{ \"singleInt32\": \"" + jsonValue + "\"}"; + var parsed = TestAllTypes.Parser.ParseJson(json); + Assert.AreEqual(expectedParsedValue, parsed.SingleInt32); + } + + [Test] + [TestCase("+0")] + [TestCase("00")] + [TestCase("-00")] + [TestCase("--1")] + [TestCase("+1")] + [TestCase("1.5")] + [TestCase("1e10")] + [TestCase("2147483648")] + [TestCase("-2147483649")] + public void StringToInt32_Invalid(string jsonValue) + { + string json = "{ \"singleInt32\": \"" + jsonValue + "\"}"; + Assert.Throws(() => TestAllTypes.Parser.ParseJson(json)); + } + + [Test] + [TestCase("0", 0U)] + [TestCase("1", 1U)] + [TestCase("4294967295", 4294967295U)] + public void StringToUInt32_Valid(string jsonValue, uint expectedParsedValue) + { + string json = "{ \"singleUint32\": \"" + jsonValue + "\"}"; + var parsed = TestAllTypes.Parser.ParseJson(json); + Assert.AreEqual(expectedParsedValue, parsed.SingleUint32); + } + + // Assume that anything non-bounds-related is covered in the Int32 case + [Test] + [TestCase("-1")] + [TestCase("4294967296")] + public void StringToUInt32_Invalid(string jsonValue) + { + string json = "{ \"singleUint32\": \"" + jsonValue + "\"}"; + Assert.Throws(() => TestAllTypes.Parser.ParseJson(json)); + } + + [Test] + [TestCase("0", 0L)] + [TestCase("1", 1L)] + [TestCase("-1", -1L)] + [TestCase("9223372036854775807", 9223372036854775807)] + [TestCase("-9223372036854775808", -9223372036854775808)] + public void StringToInt64_Valid(string jsonValue, long expectedParsedValue) + { + string json = "{ \"singleInt64\": \"" + jsonValue + "\"}"; + var parsed = TestAllTypes.Parser.ParseJson(json); + Assert.AreEqual(expectedParsedValue, parsed.SingleInt64); + } + + // Assume that anything non-bounds-related is covered in the Int32 case + [Test] + [TestCase("-9223372036854775809")] + [TestCase("9223372036854775808")] + public void StringToInt64_Invalid(string jsonValue) + { + string json = "{ \"singleInt64\": \"" + jsonValue + "\"}"; + Assert.Throws(() => TestAllTypes.Parser.ParseJson(json)); + } + + [Test] + [TestCase("0", 0UL)] + [TestCase("1", 1UL)] + [TestCase("18446744073709551615", 18446744073709551615)] + public void StringToUInt64_Valid(string jsonValue, ulong expectedParsedValue) + { + string json = "{ \"singleUint64\": \"" + jsonValue + "\"}"; + var parsed = TestAllTypes.Parser.ParseJson(json); + Assert.AreEqual(expectedParsedValue, parsed.SingleUint64); + } + + // Assume that anything non-bounds-related is covered in the Int32 case + [Test] + [TestCase("-1")] + [TestCase("18446744073709551616")] + public void StringToUInt64_Invalid(string jsonValue) + { + string json = "{ \"singleUint64\": \"" + jsonValue + "\"}"; + Assert.Throws(() => TestAllTypes.Parser.ParseJson(json)); + } + + [Test] + [TestCase("0", 0d)] + [TestCase("1", 1d)] + [TestCase("1.000000", 1d)] + [TestCase("1.0000000000000000000000001", 1d)] // We don't notice that we haven't preserved the exact value + [TestCase("-1", -1d)] + [TestCase("1e1", 10d)] + [TestCase("1e01", 10d)] // Leading decimals are allowed in exponents + [TestCase("1E1", 10d)] // Either case is fine + [TestCase("-1e1", -10d)] + [TestCase("1.5e1", 15d)] + [TestCase("-1.5e1", -15d)] + [TestCase("15e-1", 1.5d)] + [TestCase("-15e-1", -1.5d)] + [TestCase("1.79769e308", 1.79769e308)] + [TestCase("-1.79769e308", -1.79769e308)] + [TestCase("Infinity", double.PositiveInfinity)] + [TestCase("-Infinity", double.NegativeInfinity)] + [TestCase("NaN", double.NaN)] + public void StringToDouble_Valid(string jsonValue, double expectedParsedValue) + { + string json = "{ \"singleDouble\": \"" + jsonValue + "\"}"; + var parsed = TestAllTypes.Parser.ParseJson(json); + Assert.AreEqual(expectedParsedValue, parsed.SingleDouble); + } + + [Test] + [TestCase("1.7977e308")] + [TestCase("-1.7977e308")] + [TestCase("1e309")] + [TestCase("1,0")] + [TestCase("1.0.0")] + [TestCase("+1")] + [TestCase("00")] + [TestCase("--1")] + [TestCase("\u00BD")] // 1/2 as a single Unicode character. Just sanity checking... + public void StringToDouble_Invalid(string jsonValue) + { + string json = "{ \"singleDouble\": \"" + jsonValue + "\"}"; + Assert.Throws(() => TestAllTypes.Parser.ParseJson(json)); + } + + [Test] + [TestCase("0", 0f)] + [TestCase("1", 1f)] + [TestCase("1.000000", 1f)] + [TestCase("-1", -1f)] + [TestCase("3.402823e38", 3.402823e38f)] + [TestCase("-3.402823e38", -3.402823e38f)] + [TestCase("1.5e1", 15f)] + [TestCase("15e-1", 1.5f)] + public void StringToFloat_Valid(string jsonValue, float expectedParsedValue) + { + string json = "{ \"singleFloat\": \"" + jsonValue + "\"}"; + var parsed = TestAllTypes.Parser.ParseJson(json); + Assert.AreEqual(expectedParsedValue, parsed.SingleFloat); + } + + [Test] + [TestCase("3.402824e38")] + [TestCase("-3.402824e38")] + [TestCase("1,0")] + [TestCase("1.0.0")] + [TestCase("+1")] + [TestCase("00")] + [TestCase("--1")] + public void StringToFloat_Invalid(string jsonValue) + { + string json = "{ \"singleFloat\": \"" + jsonValue + "\"}"; + Assert.Throws(() => TestAllTypes.Parser.ParseJson(json)); + } + + [Test] + [TestCase("0", 0)] + [TestCase("-0", 0)] // Not entirely clear whether we intend to allow this... + [TestCase("1", 1)] + [TestCase("-1", -1)] + [TestCase("2147483647", 2147483647)] + [TestCase("-2147483648", -2147483648)] + public void NumberToInt32_Valid(string jsonValue, int expectedParsedValue) + { + string json = "{ \"singleInt32\": " + jsonValue + "}"; + var parsed = TestAllTypes.Parser.ParseJson(json); + Assert.AreEqual(expectedParsedValue, parsed.SingleInt32); + } + + [Test] + [TestCase("+0")] + [TestCase("00")] + [TestCase("-00")] + [TestCase("--1")] + [TestCase("+1")] + [TestCase("1.5", Ignore = true, Reason = "Desired behaviour unclear")] + [TestCase("1e10")] + [TestCase("2147483648")] + [TestCase("-2147483649")] + public void NumberToInt32_Invalid(string jsonValue) + { + string json = "{ \"singleInt32\": " + jsonValue + "}"; + Assert.Throws(() => TestAllTypes.Parser.ParseJson(json)); + } + + [Test] + [TestCase("0", 0U)] + [TestCase("1", 1U)] + [TestCase("4294967295", 4294967295U)] + public void NumberToUInt32_Valid(string jsonValue, uint expectedParsedValue) + { + string json = "{ \"singleUint32\": " + jsonValue + "}"; + var parsed = TestAllTypes.Parser.ParseJson(json); + Assert.AreEqual(expectedParsedValue, parsed.SingleUint32); + } + + // Assume that anything non-bounds-related is covered in the Int32 case + [Test] + [TestCase("-1")] + [TestCase("4294967296")] + public void NumberToUInt32_Invalid(string jsonValue) + { + string json = "{ \"singleUint32\": " + jsonValue + "}"; + Assert.Throws(() => TestAllTypes.Parser.ParseJson(json)); + } + + [Test] + [TestCase("0", 0L)] + [TestCase("1", 1L)] + [TestCase("-1", -1L)] + [TestCase("9223372036854775807", 9223372036854775807, Ignore = true, Reason = "Desired behaviour unclear")] + [TestCase("-9223372036854775808", -9223372036854775808, Ignore = true, Reason = "Desired behaviour unclear")] + public void NumberToInt64_Valid(string jsonValue, long expectedParsedValue) + { + string json = "{ \"singleInt64\": " + jsonValue + "}"; + var parsed = TestAllTypes.Parser.ParseJson(json); + Assert.AreEqual(expectedParsedValue, parsed.SingleInt64); + } + + // Assume that anything non-bounds-related is covered in the Int32 case + [Test] + [TestCase("-9223372036854775809", Ignore = true, Reason = "Desired behaviour unclear")] + [TestCase("9223372036854775808", Ignore = true, Reason = "Desired behaviour unclear")] + public void NumberToInt64_Invalid(string jsonValue) + { + string json = "{ \"singleInt64\": " + jsonValue + "}"; + Assert.Throws(() => TestAllTypes.Parser.ParseJson(json)); + } + + [Test] + [TestCase("0", 0UL)] + [TestCase("1", 1UL)] + [TestCase("18446744073709551615", 18446744073709551615, Ignore = true, Reason = "Desired behaviour unclear")] + public void NumberToUInt64_Valid(string jsonValue, ulong expectedParsedValue) + { + string json = "{ \"singleUint64\": " + jsonValue + "}"; + var parsed = TestAllTypes.Parser.ParseJson(json); + Assert.AreEqual(expectedParsedValue, parsed.SingleUint64); + } + + // Assume that anything non-bounds-related is covered in the Int32 case + [Test] + [TestCase("-1")] + [TestCase("18446744073709551616")] + public void NumberToUInt64_Invalid(string jsonValue) + { + string json = "{ \"singleUint64\": " + jsonValue + "}"; + Assert.Throws(() => TestAllTypes.Parser.ParseJson(json)); + } + + [Test] + [TestCase("0", 0d)] + [TestCase("1", 1d)] + [TestCase("1.000000", 1d)] + [TestCase("1.0000000000000000000000001", 1d)] // We don't notice that we haven't preserved the exact value + [TestCase("-1", -1d)] + [TestCase("1e1", 10d)] + [TestCase("1e01", 10d)] // Leading decimals are allowed in exponents + [TestCase("1E1", 10d)] // Either case is fine + [TestCase("-1e1", -10d)] + [TestCase("1.5e1", 15d)] + [TestCase("-1.5e1", -15d)] + [TestCase("15e-1", 1.5d)] + [TestCase("-15e-1", -1.5d)] + [TestCase("1.79769e308", 1.79769e308)] + [TestCase("-1.79769e308", -1.79769e308)] + public void NumberToDouble_Valid(string jsonValue, double expectedParsedValue) + { + string json = "{ \"singleDouble\": " + jsonValue + "}"; + var parsed = TestAllTypes.Parser.ParseJson(json); + Assert.AreEqual(expectedParsedValue, parsed.SingleDouble); + } + + [Test] + [TestCase("1.7977e308", Ignore = true, Reason = "Desired behaviour unclear")] + [TestCase("-1.7977e308", Ignore = true, Reason = "Desired behaviour unclear")] + [TestCase("1e309", Ignore = true, Reason = "Desired behaviour unclear")] + [TestCase("1,0")] + [TestCase("1.0.0")] + [TestCase("+1")] + [TestCase("00")] + [TestCase("--1")] + [TestCase("\u00BD")] // 1/2 as a single Unicode character. Just sanity checking... + public void NumberToDouble_Invalid(string jsonValue) + { + string json = "{ \"singleDouble\": " + jsonValue + "}"; + Assert.Throws(() => TestAllTypes.Parser.ParseJson(json)); + } + + [Test] + [TestCase("0", 0f)] + [TestCase("1", 1f)] + [TestCase("1.000000", 1f)] + [TestCase("-1", -1f)] + [TestCase("3.402823e38", 3.402823e38f)] + [TestCase("-3.402823e38", -3.402823e38f)] + [TestCase("1.5e1", 15f)] + [TestCase("15e-1", 1.5f)] + public void NumberToFloat_Valid(string jsonValue, float expectedParsedValue) + { + string json = "{ \"singleFloat\": " + jsonValue + "}"; + var parsed = TestAllTypes.Parser.ParseJson(json); + Assert.AreEqual(expectedParsedValue, parsed.SingleFloat); + } + + [Test] + [TestCase("3.402824e38")] + [TestCase("-3.402824e38")] + [TestCase("1,0")] + [TestCase("1.0.0")] + [TestCase("+1")] + [TestCase("00")] + [TestCase("--1")] + public void NumberToFloat_Invalid(string jsonValue) + { + string json = "{ \"singleFloat\": " + jsonValue + "}"; + Assert.Throws(() => TestAllTypes.Parser.ParseJson(json)); + } + + // The simplest way of testing that the value has parsed correctly is to reformat it, + // as we trust the formatting. In many cases that will give the same result as the input, + // so in those cases we accept an expectedFormatted value of null. Sometimes the results + // will be different though, due to a different number of digits being provided. + [Test] + // Z offset + [TestCase("2015-10-09T14:46:23.123456789Z", null)] + [TestCase("2015-10-09T14:46:23.123456Z", null)] + [TestCase("2015-10-09T14:46:23.123Z", null)] + [TestCase("2015-10-09T14:46:23Z", null)] + [TestCase("2015-10-09T14:46:23.123456000Z", "2015-10-09T14:46:23.123456Z")] + [TestCase("2015-10-09T14:46:23.1234560Z", "2015-10-09T14:46:23.123456Z")] + [TestCase("2015-10-09T14:46:23.123000000Z", "2015-10-09T14:46:23.123Z")] + [TestCase("2015-10-09T14:46:23.1230Z", "2015-10-09T14:46:23.123Z")] + [TestCase("2015-10-09T14:46:23.00Z", "2015-10-09T14:46:23Z")] + + // +00:00 offset + [TestCase("2015-10-09T14:46:23.123456789+00:00", "2015-10-09T14:46:23.123456789Z")] + [TestCase("2015-10-09T14:46:23.123456+00:00", "2015-10-09T14:46:23.123456Z")] + [TestCase("2015-10-09T14:46:23.123+00:00", "2015-10-09T14:46:23.123Z")] + [TestCase("2015-10-09T14:46:23+00:00", "2015-10-09T14:46:23Z")] + [TestCase("2015-10-09T14:46:23.123456000+00:00", "2015-10-09T14:46:23.123456Z")] + [TestCase("2015-10-09T14:46:23.1234560+00:00", "2015-10-09T14:46:23.123456Z")] + [TestCase("2015-10-09T14:46:23.123000000+00:00", "2015-10-09T14:46:23.123Z")] + [TestCase("2015-10-09T14:46:23.1230+00:00", "2015-10-09T14:46:23.123Z")] + [TestCase("2015-10-09T14:46:23.00+00:00", "2015-10-09T14:46:23Z")] + + // Other offsets (assume by now that the subsecond handling is okay) + [TestCase("2015-10-09T15:46:23.123456789+01:00", "2015-10-09T14:46:23.123456789Z")] + [TestCase("2015-10-09T13:46:23.123456789-01:00", "2015-10-09T14:46:23.123456789Z")] + [TestCase("2015-10-09T15:16:23.123456789+00:30", "2015-10-09T14:46:23.123456789Z")] + [TestCase("2015-10-09T14:16:23.123456789-00:30", "2015-10-09T14:46:23.123456789Z")] + [TestCase("2015-10-09T16:31:23.123456789+01:45", "2015-10-09T14:46:23.123456789Z")] + [TestCase("2015-10-09T13:01:23.123456789-01:45", "2015-10-09T14:46:23.123456789Z")] + [TestCase("2015-10-10T08:46:23.123456789+18:00", "2015-10-09T14:46:23.123456789Z")] + [TestCase("2015-10-08T20:46:23.123456789-18:00", "2015-10-09T14:46:23.123456789Z")] + + // Leap years and min/max + [TestCase("2016-02-29T14:46:23.123456789Z", null)] + [TestCase("2000-02-29T14:46:23.123456789Z", null)] + [TestCase("0001-01-01T00:00:00Z", null)] + [TestCase("9999-12-31T23:59:59.999999999Z", null)] + public void Timestamp_Valid(string jsonValue, string expectedFormatted) + { + expectedFormatted = expectedFormatted ?? jsonValue; + string json = "\"" + jsonValue + "\""; + var parsed = Timestamp.Parser.ParseJson(json); + Assert.AreEqual(expectedFormatted, parsed.ToString()); + } + + [Test] + [TestCase("2015-10-09 14:46:23.123456789Z", Description = "No T between date and time")] + [TestCase("2015/10/09T14:46:23.123456789Z", Description = "Wrong date separators")] + [TestCase("2015-10-09T14.46.23.123456789Z", Description = "Wrong time separators")] + [TestCase("2015-10-09T14:46:23,123456789Z", Description = "Wrong fractional second separators (valid ISO-8601 though)")] + [TestCase(" 2015-10-09T14:46:23.123456789Z", Description = "Whitespace at start")] + [TestCase("2015-10-09T14:46:23.123456789Z ", Description = "Whitespace at end")] + [TestCase("2015-10-09T14:46:23.1234567890", Description = "Too many digits")] + [TestCase("2015-10-09T14:46:23.123456789", Description = "No offset")] + [TestCase("2015-13-09T14:46:23.123456789Z", Description = "Invalid month")] + [TestCase("2015-10-32T14:46:23.123456789Z", Description = "Invalid day")] + [TestCase("2015-10-09T24:00:00.000000000Z", Description = "Invalid hour (valid ISO-8601 though)")] + [TestCase("2015-10-09T14:60:23.123456789Z", Description = "Invalid minutes")] + [TestCase("2015-10-09T14:46:60.123456789Z", Description = "Invalid seconds")] + [TestCase("2015-10-09T14:46:23.123456789+18:01", Description = "Offset too large (positive)")] + [TestCase("2015-10-09T14:46:23.123456789-18:01", Description = "Offset too large (negative)")] + [TestCase("2015-10-09T14:46:23.123456789-00:00", Description = "Local offset (-00:00) makes no sense here")] + [TestCase("0001-01-01T00:00:00+00:01", Description = "Value before earliest when offset applied")] + [TestCase("9999-12-31T23:59:59.999999999-00:01", Description = "Value after latest when offset applied")] + [TestCase("2100-02-29T14:46:23.123456789Z", Description = "Feb 29th on a non-leap-year")] + public void Timestamp_Invalid(string jsonValue) + { + string json = "\"" + jsonValue + "\""; + Assert.Throws(() => Timestamp.Parser.ParseJson(json)); + } + + [Test] + public void StructValue_Null() + { + Assert.AreEqual(new Value { NullValue = 0 }, Value.Parser.ParseJson("null")); + } + + [Test] + public void StructValue_String() + { + Assert.AreEqual(new Value { StringValue = "hi" }, Value.Parser.ParseJson("\"hi\"")); + } + + [Test] + public void StructValue_Bool() + { + Assert.AreEqual(new Value { BoolValue = true }, Value.Parser.ParseJson("true")); + Assert.AreEqual(new Value { BoolValue = false }, Value.Parser.ParseJson("false")); + } + + [Test] + public void StructValue_List() + { + Assert.AreEqual(Value.ForList(Value.ForNumber(1), Value.ForString("x")), Value.Parser.ParseJson("[1, \"x\"]")); + } + + [Test] + public void ParseListValue() + { + Assert.AreEqual(new ListValue { Values = { Value.ForNumber(1), Value.ForString("x") } }, ListValue.Parser.ParseJson("[1, \"x\"]")); + } + + [Test] + public void StructValue_Struct() + { + Assert.AreEqual( + Value.ForStruct(new Struct { Fields = { { "x", Value.ForNumber(1) }, { "y", Value.ForString("z") } } }), + Value.Parser.ParseJson("{ \"x\": 1, \"y\": \"z\" }")); + } + + [Test] + public void ParseStruct() + { + Assert.AreEqual(new Struct { Fields = { { "x", Value.ForNumber(1) }, { "y", Value.ForString("z") } } }, + Struct.Parser.ParseJson("{ \"x\": 1, \"y\": \"z\" }")); + } + + // TODO for duration parsing: upper and lower bounds. + // +/- 315576000000 seconds + + [Test] + [TestCase("1.123456789s", null)] + [TestCase("1.123456s", null)] + [TestCase("1.123s", null)] + [TestCase("1.12300s", "1.123s")] + [TestCase("1.12345s", "1.123450s")] + [TestCase("1s", null)] + [TestCase("-1.123456789s", null)] + [TestCase("-1.123456s", null)] + [TestCase("-1.123s", null)] + [TestCase("-1s", null)] + [TestCase("0.123s", null)] + [TestCase("-0.123s", null)] + [TestCase("123456.123s", null)] + [TestCase("-123456.123s", null)] + // Upper and lower bounds + [TestCase("315576000000s", null)] + [TestCase("-315576000000s", null)] + public void Duration_Valid(string jsonValue, string expectedFormatted) + { + expectedFormatted = expectedFormatted ?? jsonValue; + string json = "\"" + jsonValue + "\""; + var parsed = Duration.Parser.ParseJson(json); + Assert.AreEqual(expectedFormatted, parsed.ToString()); + } + + // The simplest way of testing that the value has parsed correctly is to reformat it, + // as we trust the formatting. In many cases that will give the same result as the input, + // so in those cases we accept an expectedFormatted value of null. Sometimes the results + // will be different though, due to a different number of digits being provided. + [Test] + [TestCase("1.1234567890s", Description = "Too many digits")] + [TestCase("1.123456789", Description = "No suffix")] + [TestCase("1.123456789ss", Description = "Too much suffix")] + [TestCase("1.123456789S", Description = "Upper case suffix")] + [TestCase("+1.123456789s", Description = "Leading +")] + [TestCase(".123456789s", Description = "No integer before the fraction")] + [TestCase("1,123456789s", Description = "Comma as decimal separator")] + [TestCase("1x1.123456789s", Description = "Non-digit in integer part")] + [TestCase("1.1x3456789s", Description = "Non-digit in fractional part")] + [TestCase(" 1.123456789s", Description = "Whitespace before fraction")] + [TestCase("1.123456789s ", Description = "Whitespace after value")] + [TestCase("01.123456789s", Description = "Leading zero (positive)")] + [TestCase("-01.123456789s", Description = "Leading zero (negative)")] + [TestCase("--0.123456789s", Description = "Double minus sign")] + // Violate upper/lower bounds in various ways + [TestCase("315576000001s", Description = "Integer part too large")] + [TestCase("315576000000.000000001s", Description = "Integer part is upper bound; non-zero fraction")] + [TestCase("3155760000000s", Description = "Integer part too long (positive)")] + [TestCase("-3155760000000s", Description = "Integer part too long (negative)")] + public void Duration_Invalid(string jsonValue) + { + string json = "\"" + jsonValue + "\""; + Assert.Throws(() => Duration.Parser.ParseJson(json)); + } + + // Not as many tests for field masks as I'd like; more to be added when we have more + // detailed specifications. + + [Test] + [TestCase("")] + [TestCase("foo", "foo")] + [TestCase("foo,bar", "foo", "bar")] + [TestCase("foo.bar", "foo.bar")] + [TestCase("fooBar", "foo_bar")] + [TestCase("fooBar.bazQux", "foo_bar.baz_qux")] + public void FieldMask_Valid(string jsonValue, params string[] expectedPaths) + { + string json = "\"" + jsonValue + "\""; + var parsed = FieldMask.Parser.ParseJson(json); + CollectionAssert.AreEqual(expectedPaths, parsed.Paths); + } + + [Test] + public void DataAfterObject() + { + string json = "{} 10"; + Assert.Throws(() => TestAllTypes.Parser.ParseJson(json)); + } + } +} diff --git a/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs new file mode 100644 index 0000000000..868d9f752c --- /dev/null +++ b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs @@ -0,0 +1,352 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion +using NUnit.Framework; +using System; +using System.IO; + +namespace Google.Protobuf +{ + public class JsonTokenizerTest + { + [Test] + public void EmptyObjectValue() + { + AssertTokens("{}", JsonToken.StartObject, JsonToken.EndObject); + } + + [Test] + public void EmptyArrayValue() + { + AssertTokens("[]", JsonToken.StartArray, JsonToken.EndArray); + } + + [Test] + [TestCase("foo", "foo")] + [TestCase("tab\\t", "tab\t")] + [TestCase("line\\nfeed", "line\nfeed")] + [TestCase("carriage\\rreturn", "carriage\rreturn")] + [TestCase("back\\bspace", "back\bspace")] + [TestCase("form\\ffeed", "form\ffeed")] + [TestCase("escaped\\/slash", "escaped/slash")] + [TestCase("escaped\\\\backslash", "escaped\\backslash")] + [TestCase("escaped\\\"quote", "escaped\"quote")] + [TestCase("foo {}[] bar", "foo {}[] bar")] + [TestCase("foo\\u09aFbar", "foo\u09afbar")] // Digits, upper hex, lower hex + [TestCase("ab\ud800\udc00cd", "ab\ud800\udc00cd")] + [TestCase("ab\\ud800\\udc00cd", "ab\ud800\udc00cd")] + public void StringValue(string json, string expectedValue) + { + AssertTokensNoReplacement("\"" + json + "\"", JsonToken.Value(expectedValue)); + } + + // Valid surrogate pairs, with mixed escaping. These test cases can't be expressed + // using TestCase as they have no valid UTF-8 representation. + // It's unclear exactly how we should handle a mixture of escaped or not: that can't + // come from UTF-8 text, but could come from a .NET string. For the moment, + // treat it as valid in the obvious way. + [Test] + public void MixedSurrogatePairs() + { + string expected = "\ud800\udc00"; + AssertTokens("'\\ud800\udc00'", JsonToken.Value(expected)); + AssertTokens("'\ud800\\udc00'", JsonToken.Value(expected)); + } + + [Test] + [TestCase("embedded tab\t")] + [TestCase("embedded CR\r")] + [TestCase("embedded LF\n")] + [TestCase("embedded bell\u0007")] + [TestCase("bad escape\\a")] + [TestCase("incomplete escape\\")] + [TestCase("incomplete Unicode escape\\u000")] + [TestCase("invalid Unicode escape\\u000H")] + // Surrogate pair handling, both in raw .NET strings and escaped. We only need + // to detect this in strings, as non-ASCII characters anywhere other than in strings + // will already lead to parsing errors. + [TestCase("\\ud800")] + [TestCase("\\udc00")] + [TestCase("\\ud800x")] + [TestCase("\\udc00x")] + [TestCase("\\udc00\\ud800y")] + public void InvalidStringValue(string json) + { + AssertThrowsAfter("\"" + json + "\""); + } + + // Tests for invalid strings that can't be expressed in attributes, + // as the constants can't be expressed as UTF-8 strings. + [Test] + public void InvalidSurrogatePairs() + { + AssertThrowsAfter("\"\ud800x\""); + AssertThrowsAfter("\"\udc00y\""); + AssertThrowsAfter("\"\udc00\ud800y\""); + } + + [Test] + [TestCase("0", 0)] + [TestCase("-0", 0)] // We don't distinguish between positive and negative 0 + [TestCase("1", 1)] + [TestCase("-1", -1)] + // From here on, assume leading sign is okay... + [TestCase("1.125", 1.125)] + [TestCase("1.0", 1)] + [TestCase("1e5", 100000)] + [TestCase("1e000000", 1)] // Weird, but not prohibited by the spec + [TestCase("1E5", 100000)] + [TestCase("1e+5", 100000)] + [TestCase("1E-5", 0.00001)] + [TestCase("123E-2", 1.23)] + [TestCase("123.45E3", 123450)] + [TestCase(" 1 ", 1)] + public void NumberValue(string json, double expectedValue) + { + AssertTokens(json, JsonToken.Value(expectedValue)); + } + + [Test] + [TestCase("00")] + [TestCase(".5")] + [TestCase("1.")] + [TestCase("1e")] + [TestCase("1e-")] + [TestCase("--")] + [TestCase("--1")] + [TestCase("-1.7977e308")] + [TestCase("1.7977e308")] + public void InvalidNumberValue(string json) + { + AssertThrowsAfter(json); + } + + [Test] + [TestCase("nul")] + [TestCase("nothing")] + [TestCase("truth")] + [TestCase("fALSEhood")] + public void InvalidLiterals(string json) + { + AssertThrowsAfter(json); + } + + [Test] + public void NullValue() + { + AssertTokens("null", JsonToken.Null); + } + + [Test] + public void TrueValue() + { + AssertTokens("true", JsonToken.True); + } + + [Test] + public void FalseValue() + { + AssertTokens("false", JsonToken.False); + } + + [Test] + public void SimpleObject() + { + AssertTokens("{'x': 'y'}", + JsonToken.StartObject, JsonToken.Name("x"), JsonToken.Value("y"), JsonToken.EndObject); + } + + [Test] + [TestCase("[10, 20", 3)] + [TestCase("[10,", 2)] + [TestCase("[10:20]", 2)] + [TestCase("[", 1)] + [TestCase("[,", 1)] + [TestCase("{", 1)] + [TestCase("{,", 1)] + [TestCase("{", 1)] + [TestCase("{[", 1)] + [TestCase("{{", 1)] + [TestCase("{0", 1)] + [TestCase("{null", 1)] + [TestCase("{false", 1)] + [TestCase("{true", 1)] + [TestCase("}", 0)] + [TestCase("]", 0)] + [TestCase(",", 0)] + [TestCase("'foo' 'bar'", 1)] + [TestCase(":", 0)] + [TestCase("'foo", 0)] // Incomplete string + [TestCase("{ 'foo' }", 2)] + [TestCase("{ x:1", 1)] // Property names must be quoted + [TestCase("{]", 1)] + [TestCase("[}", 1)] + [TestCase("[1,", 2)] + [TestCase("{'x':0]", 3)] + [TestCase("{ 'foo': }", 2)] + [TestCase("{ 'foo':'bar', }", 3)] + public void InvalidStructure(string json, int expectedValidTokens) + { + // Note: we don't test that the earlier tokens are exactly as expected, + // partly because that's hard to parameterize. + var reader = new StringReader(json.Replace('\'', '"')); + var tokenizer = new JsonTokenizer(reader); + for (int i = 0; i < expectedValidTokens; i++) + { + Assert.IsNotNull(tokenizer.Next()); + } + Assert.Throws(() => tokenizer.Next()); + } + + [Test] + public void ArrayMixedType() + { + AssertTokens("[1, 'foo', null, false, true, [2], {'x':'y' }]", + JsonToken.StartArray, + JsonToken.Value(1), + JsonToken.Value("foo"), + JsonToken.Null, + JsonToken.False, + JsonToken.True, + JsonToken.StartArray, + JsonToken.Value(2), + JsonToken.EndArray, + JsonToken.StartObject, + JsonToken.Name("x"), + JsonToken.Value("y"), + JsonToken.EndObject, + JsonToken.EndArray); + } + + [Test] + public void ObjectMixedType() + { + AssertTokens(@"{'a': 1, 'b': 'bar', 'c': null, 'd': false, 'e': true, + 'f': [2], 'g': {'x':'y' }}", + JsonToken.StartObject, + JsonToken.Name("a"), + JsonToken.Value(1), + JsonToken.Name("b"), + JsonToken.Value("bar"), + JsonToken.Name("c"), + JsonToken.Null, + JsonToken.Name("d"), + JsonToken.False, + JsonToken.Name("e"), + JsonToken.True, + JsonToken.Name("f"), + JsonToken.StartArray, + JsonToken.Value(2), + JsonToken.EndArray, + JsonToken.Name("g"), + JsonToken.StartObject, + JsonToken.Name("x"), + JsonToken.Value("y"), + JsonToken.EndObject, + JsonToken.EndObject); + } + + [Test] + public void NextAfterEndDocumentThrows() + { + var tokenizer = new JsonTokenizer(new StringReader("null")); + Assert.AreEqual(JsonToken.Null, tokenizer.Next()); + Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); + Assert.Throws(() => tokenizer.Next()); + } + + [Test] + public void CanPushBackEndDocument() + { + var tokenizer = new JsonTokenizer(new StringReader("null")); + Assert.AreEqual(JsonToken.Null, tokenizer.Next()); + Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); + tokenizer.PushBack(JsonToken.EndDocument); + Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); + Assert.Throws(() => tokenizer.Next()); + } + + /// + /// Asserts that the specified JSON is tokenized into the given sequence of tokens. + /// All apostrophes are first converted to double quotes, allowing any tests + /// that don't need to check actual apostrophe handling to use apostrophes in the JSON, avoiding + /// messy string literal escaping. The "end document" token is not specified in the list of + /// expected tokens, but is implicit. + /// + private static void AssertTokens(string json, params JsonToken[] expectedTokens) + { + AssertTokensNoReplacement(json.Replace('\'', '"'), expectedTokens); + } + + /// + /// Asserts that the specified JSON is tokenized into the given sequence of tokens. + /// Unlike , this does not perform any character + /// replacement on the specified JSON, and should be used when the text contains apostrophes which + /// are expected to be used *as* apostrophes. The "end document" token is not specified in the list of + /// expected tokens, but is implicit. + /// + private static void AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens) + { + var reader = new StringReader(json); + var tokenizer = new JsonTokenizer(reader); + for (int i = 0; i < expectedTokens.Length; i++) + { + var actualToken = tokenizer.Next(); + if (actualToken == JsonToken.EndDocument) + { + Assert.Fail("Expected {0} but reached end of token stream", expectedTokens[i]); + } + Assert.AreEqual(expectedTokens[i], actualToken); + } + var finalToken = tokenizer.Next(); + if (finalToken != JsonToken.EndDocument) + { + Assert.Fail("Expected token stream to be exhausted; received {0}", finalToken); + } + } + + private static void AssertThrowsAfter(string json, params JsonToken[] expectedTokens) + { + var reader = new StringReader(json); + var tokenizer = new JsonTokenizer(reader); + for (int i = 0; i < expectedTokens.Length; i++) + { + var actualToken = tokenizer.Next(); + if (actualToken == JsonToken.EndDocument) + { + Assert.Fail("Expected {0} but reached end of document", expectedTokens[i]); + } + Assert.AreEqual(expectedTokens[i], actualToken); + } + Assert.Throws(() => tokenizer.Next()); + } + } +} diff --git a/csharp/src/Google.Protobuf/FieldCodec.cs b/csharp/src/Google.Protobuf/FieldCodec.cs index 20a1f438f8..54cbc209d5 100644 --- a/csharp/src/Google.Protobuf/FieldCodec.cs +++ b/csharp/src/Google.Protobuf/FieldCodec.cs @@ -30,6 +30,7 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endregion +using Google.Protobuf.WellKnownTypes; using System; using System.Collections.Generic; @@ -261,20 +262,17 @@ namespace Google.Protobuf /// private static class WrapperCodecs { - // All the field numbers are the same (1). - private const int WrapperValueFieldNumber = Google.Protobuf.WellKnownTypes.Int32Value.ValueFieldNumber; - - private static readonly Dictionary Codecs = new Dictionary + private static readonly Dictionary Codecs = new Dictionary { - { typeof(bool), ForBool(WireFormat.MakeTag(WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, - { typeof(int), ForInt32(WireFormat.MakeTag(WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, - { typeof(long), ForInt64(WireFormat.MakeTag(WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, - { typeof(uint), ForUInt32(WireFormat.MakeTag(WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, - { typeof(ulong), ForUInt64(WireFormat.MakeTag(WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, - { typeof(float), ForFloat(WireFormat.MakeTag(WrapperValueFieldNumber, WireFormat.WireType.Fixed32)) }, - { typeof(double), ForDouble(WireFormat.MakeTag(WrapperValueFieldNumber, WireFormat.WireType.Fixed64)) }, - { typeof(string), ForString(WireFormat.MakeTag(WrapperValueFieldNumber, WireFormat.WireType.LengthDelimited)) }, - { typeof(ByteString), ForBytes(WireFormat.MakeTag(WrapperValueFieldNumber, WireFormat.WireType.LengthDelimited)) } + { typeof(bool), ForBool(WireFormat.MakeTag(Wrappers.WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, + { typeof(int), ForInt32(WireFormat.MakeTag(Wrappers.WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, + { typeof(long), ForInt64(WireFormat.MakeTag(Wrappers.WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, + { typeof(uint), ForUInt32(WireFormat.MakeTag(Wrappers.WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, + { typeof(ulong), ForUInt64(WireFormat.MakeTag(Wrappers.WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, + { typeof(float), ForFloat(WireFormat.MakeTag(Wrappers.WrapperValueFieldNumber, WireFormat.WireType.Fixed32)) }, + { typeof(double), ForDouble(WireFormat.MakeTag(Wrappers.WrapperValueFieldNumber, WireFormat.WireType.Fixed64)) }, + { typeof(string), ForString(WireFormat.MakeTag(Wrappers.WrapperValueFieldNumber, WireFormat.WireType.LengthDelimited)) }, + { typeof(ByteString), ForBytes(WireFormat.MakeTag(Wrappers.WrapperValueFieldNumber, WireFormat.WireType.LengthDelimited)) } }; /// diff --git a/csharp/src/Google.Protobuf/Google.Protobuf.csproj b/csharp/src/Google.Protobuf/Google.Protobuf.csproj index 8c680d46b3..0039943890 100644 --- a/csharp/src/Google.Protobuf/Google.Protobuf.csproj +++ b/csharp/src/Google.Protobuf/Google.Protobuf.csproj @@ -85,6 +85,9 @@ + + + @@ -130,7 +133,9 @@ + + diff --git a/csharp/src/Google.Protobuf/JsonFormatter.cs b/csharp/src/Google.Protobuf/JsonFormatter.cs index 3f9bd47874..2070f62b10 100644 --- a/csharp/src/Google.Protobuf/JsonFormatter.cs +++ b/csharp/src/Google.Protobuf/JsonFormatter.cs @@ -189,6 +189,7 @@ namespace Google.Protobuf } // Converted from src/google/protobuf/util/internal/utility.cc ToCamelCase + // TODO: Use the new field in FieldDescriptor. internal static string ToCamelCase(string input) { bool capitalizeNext = false; @@ -382,10 +383,19 @@ namespace Google.Protobuf WriteNull(builder); return; } - // For wrapper types, the value will be the (possibly boxed) "native" value, - // so we can write it as if we were unconditionally writing the Value field for the wrapper type. + // For wrapper types, the value will either be the (possibly boxed) "native" value, + // or the message itself if we're formatting it at the top level (e.g. just calling ToString on the object itself). + // If it's the message form, we can extract the value first, which *will* be the (possibly boxed) native value, + // and then proceed, writing it as if we were definitely in a field. (We never need to wrap it in an extra string... + // WriteValue will do the right thing.) + // TODO: Detect this differently when we have dynamic messages. if (descriptor.File == Int32Value.Descriptor.File) { + if (value is IMessage) + { + var message = (IMessage) value; + value = message.Descriptor.Fields[Wrappers.WrapperValueFieldNumber].Accessor.GetValue(message); + } WriteValue(builder, value); return; } @@ -750,7 +760,6 @@ namespace Google.Protobuf private readonly bool formatDefaultValues; - /// /// Whether fields whose values are the default for the field type (e.g. 0 for integers) /// should be formatted (true) or omitted (false). diff --git a/csharp/src/Google.Protobuf/JsonParser.cs b/csharp/src/Google.Protobuf/JsonParser.cs new file mode 100644 index 0000000000..6d2638d929 --- /dev/null +++ b/csharp/src/Google.Protobuf/JsonParser.cs @@ -0,0 +1,813 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2015 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +using Google.Protobuf.Reflection; +using Google.Protobuf.WellKnownTypes; +using System; +using System.Collections; +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; + +namespace Google.Protobuf +{ + /// + /// Reflection-based converter from JSON to messages. + /// + /// + /// + /// Instances of this class are thread-safe, with no mutable state. + /// + /// + /// This is a simple start to get JSON parsing working. As it's reflection-based, + /// it's not as quick as baking calls into generated messages - but is a simpler implementation. + /// (This code is generally not heavily optimized.) + /// + /// + public sealed class JsonParser + { + // Note: using 0-9 instead of \d to ensure no non-ASCII digits. + // This regex isn't a complete validator, but will remove *most* invalid input. We rely on parsing to do the rest. + private static readonly Regex TimestampRegex = new Regex(@"^(?[0-9]{4}-[01][0-9]-[0-3][0-9]T[012][0-9]:[0-5][0-9]:[0-5][0-9])(?\.[0-9]{1,9})?(?(Z|[+-][0-1][0-9]:[0-5][0-9]))$", FrameworkPortability.CompiledRegexWhereAvailable); + private static readonly Regex DurationRegex = new Regex(@"^(?-)?(?[0-9]{1,12})(?\.[0-9]{1,9})?s$", FrameworkPortability.CompiledRegexWhereAvailable); + private static readonly int[] SubsecondScalingFactors = { 0, 100000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 }; + private static readonly char[] FieldMaskPathSeparators = new[] { ',' }; + + private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default); + + private static readonly Dictionary> + WellKnownTypeHandlers = new Dictionary> + { + { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) }, + { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) }, + { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) }, + { ListValue.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) }, + { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) }, + { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) }, + { Int32Value.Descriptor.FullName, MergeWrapperField }, + { Int64Value.Descriptor.FullName, MergeWrapperField }, + { UInt32Value.Descriptor.FullName, MergeWrapperField }, + { UInt64Value.Descriptor.FullName, MergeWrapperField }, + { FloatValue.Descriptor.FullName, MergeWrapperField }, + { DoubleValue.Descriptor.FullName, MergeWrapperField }, + { BytesValue.Descriptor.FullName, MergeWrapperField }, + { StringValue.Descriptor.FullName, MergeWrapperField } + }; + + // Convenience method to avoid having to repeat the same code multiple times in the above + // dictionary initialization. + private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer) + { + parser.MergeField(message, message.Descriptor.Fields[Wrappers.WrapperValueFieldNumber], tokenizer); + } + + /// + /// Returns a formatter using the default settings. /// + public static JsonParser Default { get { return defaultInstance; } } + +// Currently the settings are unused. +// TODO: When we've implemented Any (and the json spec is finalized), revisit whether they're +// needed at all. +#pragma warning disable 0414 + private readonly Settings settings; +#pragma warning restore 0414 + + /// + /// Creates a new formatted with the given settings. + /// + /// The settings. + public JsonParser(Settings settings) + { + this.settings = settings; + } + + /// + /// Parses and merges the information into the given message. + /// + /// The message to merge the JSON information into. + /// The JSON to parse. + internal void Merge(IMessage message, string json) + { + Merge(message, new StringReader(json)); + } + + /// + /// Parses JSON read from and merges the information into the given message. + /// + /// The message to merge the JSON information into. + /// Reader providing the JSON to parse. + internal void Merge(IMessage message, TextReader jsonReader) + { + var tokenizer = new JsonTokenizer(jsonReader); + Merge(message, tokenizer); + var lastToken = tokenizer.Next(); + if (lastToken != JsonToken.EndDocument) + { + throw new InvalidProtocolBufferException("Expected end of JSON after object"); + } + } + + /// + /// Merges the given message using data from the given tokenizer. In most cases, the next + /// token should be a "start object" token, but wrapper types and nullity can invalidate + /// that assumption. This is implemented as an LL(1) recursive descent parser over the stream + /// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the + /// tokenizer performing that validation - but not every token stream is valid "protobuf JSON". + /// + private void Merge(IMessage message, JsonTokenizer tokenizer) + { + if (message.Descriptor.IsWellKnownType) + { + Action handler; + if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out handler)) + { + handler(this, message, tokenizer); + return; + } + // Well-known types with no special handling continue in the normal way. + } + var token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StartObject) + { + throw new InvalidProtocolBufferException("Expected an object"); + } + var descriptor = message.Descriptor; + // TODO: Make this more efficient, e.g. by building it once in the descriptor. + // Additionally, we need to consider whether to parse field names in their original proto form, + // and any overrides in the descriptor. But yes, all of this should be in the descriptor somehow... + // the descriptor can expose the dictionary. + var jsonFieldMap = descriptor.Fields.InDeclarationOrder().ToDictionary(field => JsonFormatter.ToCamelCase(field.Name)); + while (true) + { + token = tokenizer.Next(); + if (token.Type == JsonToken.TokenType.EndObject) + { + return; + } + if (token.Type != JsonToken.TokenType.Name) + { + throw new InvalidOperationException("Unexpected token type " + token.Type); + } + string name = token.StringValue; + FieldDescriptor field; + if (jsonFieldMap.TryGetValue(name, out field)) + { + MergeField(message, field, tokenizer); + } + else + { + // TODO: Is this what we want to do? If not, we'll need to skip the value, + // which may be an object or array. (We might want to put code in the tokenizer + // to do that.) + throw new InvalidProtocolBufferException("Unknown field: " + name); + } + } + } + + private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) + { + var token = tokenizer.Next(); + if (token.Type == JsonToken.TokenType.Null) + { + // Note: different from Java API, which just ignores it. + // TODO: Bring it more in line? Discuss... + field.Accessor.Clear(message); + return; + } + tokenizer.PushBack(token); + + if (field.IsMap) + { + MergeMapField(message, field, tokenizer); + } + else if (field.IsRepeated) + { + MergeRepeatedField(message, field, tokenizer); + } + else + { + var value = ParseSingleValue(field, tokenizer); + field.Accessor.SetValue(message, value); + } + } + + private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) + { + var token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StartArray) + { + throw new InvalidProtocolBufferException("Repeated field value was not an array. Token type: " + token.Type); + } + + IList list = (IList) field.Accessor.GetValue(message); + while (true) + { + token = tokenizer.Next(); + if (token.Type == JsonToken.TokenType.EndArray) + { + return; + } + tokenizer.PushBack(token); + list.Add(ParseSingleValue(field, tokenizer)); + } + } + + private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) + { + // Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those. + var token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StartObject) + { + throw new InvalidProtocolBufferException("Expected an object to populate a map"); + } + + var type = field.MessageType; + var keyField = type.FindFieldByNumber(1); + var valueField = type.FindFieldByNumber(2); + if (keyField == null || valueField == null) + { + throw new InvalidProtocolBufferException("Invalid map field: " + field.FullName); + } + IDictionary dictionary = (IDictionary) field.Accessor.GetValue(message); + + while (true) + { + token = tokenizer.Next(); + if (token.Type == JsonToken.TokenType.EndObject) + { + return; + } + object key = ParseMapKey(keyField, token.StringValue); + object value = ParseSingleValue(valueField, tokenizer); + // TODO: Null handling + dictionary[key] = value; + } + } + + private object ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer) + { + var token = tokenizer.Next(); + if (token.Type == JsonToken.TokenType.Null) + { + if (field.FieldType == FieldType.Message && field.MessageType.FullName == Value.Descriptor.FullName) + { + return new Value { NullValue = NullValue.NULL_VALUE }; + } + return null; + } + + var fieldType = field.FieldType; + if (fieldType == FieldType.Message) + { + // Parse wrapper types as their constituent types. + // TODO: What does this mean for null? + // TODO: Detect this differently when we have dynamic messages, and put it in one place... + if (field.MessageType.IsWellKnownType && field.MessageType.File == Int32Value.Descriptor.File) + { + field = field.MessageType.Fields[Wrappers.WrapperValueFieldNumber]; + fieldType = field.FieldType; + } + else + { + // TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.) + tokenizer.PushBack(token); + IMessage subMessage = NewMessageForField(field); + Merge(subMessage, tokenizer); + return subMessage; + } + } + + switch (token.Type) + { + case JsonToken.TokenType.True: + case JsonToken.TokenType.False: + if (fieldType == FieldType.Bool) + { + return token.Type == JsonToken.TokenType.True; + } + // Fall through to "we don't support this type for this case"; could duplicate the behaviour of the default + // case instead, but this way we'd only need to change one place. + goto default; + case JsonToken.TokenType.StringValue: + return ParseSingleStringValue(field, token.StringValue); + // Note: not passing the number value itself here, as we may end up storing the string value in the token too. + case JsonToken.TokenType.Number: + return ParseSingleNumberValue(field, token); + case JsonToken.TokenType.Null: + throw new NotImplementedException("Haven't worked out what to do for null yet"); + default: + throw new InvalidProtocolBufferException("Unsupported JSON token type " + token.Type + " for field type " + fieldType); + } + } + + /// + /// Parses into a new message. + /// + /// The type of message to create. + /// The JSON to parse. + public T Parse(string json) where T : IMessage, new() + { + return Parse(new StringReader(json)); + } + + /// + /// Parses JSON read from into a new message. + /// + /// The type of message to create. + /// Reader providing the JSON to parse. + public T Parse(TextReader jsonReader) where T : IMessage, new() + { + T message = new T(); + Merge(message, jsonReader); + return message; + } + + private void MergeStructValue(IMessage message, JsonTokenizer tokenizer) + { + var firstToken = tokenizer.Next(); + var fields = message.Descriptor.Fields; + switch (firstToken.Type) + { + case JsonToken.TokenType.Null: + fields[Value.NullValueFieldNumber].Accessor.SetValue(message, 0); + return; + case JsonToken.TokenType.StringValue: + fields[Value.StringValueFieldNumber].Accessor.SetValue(message, firstToken.StringValue); + return; + case JsonToken.TokenType.Number: + fields[Value.NumberValueFieldNumber].Accessor.SetValue(message, firstToken.NumberValue); + return; + case JsonToken.TokenType.False: + case JsonToken.TokenType.True: + fields[Value.BoolValueFieldNumber].Accessor.SetValue(message, firstToken.Type == JsonToken.TokenType.True); + return; + case JsonToken.TokenType.StartObject: + { + var field = fields[Value.StructValueFieldNumber]; + var structMessage = NewMessageForField(field); + tokenizer.PushBack(firstToken); + Merge(structMessage, tokenizer); + field.Accessor.SetValue(message, structMessage); + return; + } + case JsonToken.TokenType.StartArray: + { + var field = fields[Value.ListValueFieldNumber]; + var list = NewMessageForField(field); + tokenizer.PushBack(firstToken); + Merge(list, tokenizer); + field.Accessor.SetValue(message, list); + return; + } + default: + throw new InvalidOperationException("Unexpected token type: " + firstToken.Type); + } + } + + private void MergeStruct(IMessage message, JsonTokenizer tokenizer) + { + var token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StartObject) + { + throw new InvalidProtocolBufferException("Expected object value for Struct"); + } + tokenizer.PushBack(token); + + var field = message.Descriptor.Fields[Struct.FieldsFieldNumber]; + MergeMapField(message, field, tokenizer); + } + + #region Utility methods which don't depend on the state (or settings) of the parser. + private static object ParseMapKey(FieldDescriptor field, string keyText) + { + switch (field.FieldType) + { + case FieldType.Bool: + if (keyText == "true") + { + return true; + } + if (keyText == "false") + { + return false; + } + throw new InvalidProtocolBufferException("Invalid string for bool map key: " + keyText); + case FieldType.String: + return keyText; + case FieldType.Int32: + case FieldType.SInt32: + case FieldType.SFixed32: + return ParseNumericString(keyText, int.Parse, false); + case FieldType.UInt32: + case FieldType.Fixed32: + return ParseNumericString(keyText, uint.Parse, false); + case FieldType.Int64: + case FieldType.SInt64: + case FieldType.SFixed64: + return ParseNumericString(keyText, long.Parse, false); + case FieldType.UInt64: + case FieldType.Fixed64: + return ParseNumericString(keyText, ulong.Parse, false); + default: + throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType); + } + } + + private static object ParseSingleNumberValue(FieldDescriptor field, JsonToken token) + { + double value = token.NumberValue; + checked + { + // TODO: Validate that it's actually an integer, possibly in terms of the textual representation? + try + { + switch (field.FieldType) + { + case FieldType.Int32: + case FieldType.SInt32: + case FieldType.SFixed32: + return (int) value; + case FieldType.UInt32: + case FieldType.Fixed32: + return (uint) value; + case FieldType.Int64: + case FieldType.SInt64: + case FieldType.SFixed64: + return (long) value; + case FieldType.UInt64: + case FieldType.Fixed64: + return (ulong) value; + case FieldType.Double: + return value; + case FieldType.Float: + if (double.IsNaN(value)) + { + return float.NaN; + } + if (value > float.MaxValue || value < float.MinValue) + { + if (double.IsPositiveInfinity(value)) + { + return float.PositiveInfinity; + } + if (double.IsNegativeInfinity(value)) + { + return float.NegativeInfinity; + } + throw new InvalidProtocolBufferException("Value out of range: " + value); + } + return (float) value; + default: + throw new InvalidProtocolBufferException("Unsupported conversion from JSON number for field type " + field.FieldType); + } + } + catch (OverflowException) + { + throw new InvalidProtocolBufferException("Value out of range: " + value); + } + } + } + + private static object ParseSingleStringValue(FieldDescriptor field, string text) + { + switch (field.FieldType) + { + case FieldType.String: + return text; + case FieldType.Bytes: + return ByteString.FromBase64(text); + case FieldType.Int32: + case FieldType.SInt32: + case FieldType.SFixed32: + return ParseNumericString(text, int.Parse, false); + case FieldType.UInt32: + case FieldType.Fixed32: + return ParseNumericString(text, uint.Parse, false); + case FieldType.Int64: + case FieldType.SInt64: + case FieldType.SFixed64: + return ParseNumericString(text, long.Parse, false); + case FieldType.UInt64: + case FieldType.Fixed64: + return ParseNumericString(text, ulong.Parse, false); + case FieldType.Double: + double d = ParseNumericString(text, double.Parse, true); + // double.Parse can return +/- infinity on Mono for non-infinite values which are out of range for double. + if (double.IsInfinity(d) && !text.Contains("Infinity")) + { + throw new InvalidProtocolBufferException("Invalid numeric value: " + text); + } + return d; + case FieldType.Float: + float f = ParseNumericString(text, float.Parse, true); + // float.Parse can return +/- infinity on Mono for non-infinite values which are out of range for float. + if (float.IsInfinity(f) && !text.Contains("Infinity")) + { + throw new InvalidProtocolBufferException("Invalid numeric value: " + text); + } + return f; + case FieldType.Enum: + var enumValue = field.EnumType.FindValueByName(text); + if (enumValue == null) + { + throw new InvalidProtocolBufferException("Invalid enum value: " + text + " for enum type: " + field.EnumType.FullName); + } + // Just return it as an int, and let the CLR convert it. + return enumValue.Number; + default: + throw new InvalidProtocolBufferException("Unsupported conversion from JSON string for field type " + field.FieldType); + } + } + + /// + /// Creates a new instance of the message type for the given field. + /// This method is mostly extracted so we can replace it in one go when we work out + /// what we want to do instead of Activator.CreateInstance. + /// + private static IMessage NewMessageForField(FieldDescriptor field) + { + // TODO: Create an instance in a better way ? + // (We could potentially add a Parser property to MessageDescriptor... see issue 806.) + return (IMessage) Activator.CreateInstance(field.MessageType.GeneratedType); + } + + private static T ParseNumericString(string text, Func parser, bool floatingPoint) + { + // TODO: Prohibit leading zeroes (but allow 0!) + // TODO: Validate handling of "Infinity" etc. (Should be case sensitive, no leading whitespace etc) + // Can't prohibit this with NumberStyles. + if (text.StartsWith("+")) + { + throw new InvalidProtocolBufferException("Invalid numeric value: " + text); + } + if (text.StartsWith("0") && text.Length > 1) + { + if (text[1] >= '0' && text[1] <= '9') + { + throw new InvalidProtocolBufferException("Invalid numeric value: " + text); + } + } + else if (text.StartsWith("-0") && text.Length > 2) + { + if (text[2] >= '0' && text[2] <= '9') + { + throw new InvalidProtocolBufferException("Invalid numeric value: " + text); + } + } + try + { + var styles = floatingPoint + ? NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent + : NumberStyles.AllowLeadingSign; + return parser(text, styles, CultureInfo.InvariantCulture); + } + catch (FormatException) + { + throw new InvalidProtocolBufferException("Invalid numeric value for type: " + text); + } + catch (OverflowException) + { + throw new InvalidProtocolBufferException("Value out of range: " + text); + } + } + + private static void MergeTimestamp(IMessage message, JsonToken token) + { + if (token.Type != JsonToken.TokenType.StringValue) + { + throw new InvalidProtocolBufferException("Expected string value for Timestamp"); + } + var match = TimestampRegex.Match(token.StringValue); + if (!match.Success) + { + throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); + } + var dateTime = match.Groups["datetime"].Value; + var subseconds = match.Groups["subseconds"].Value; + var offset = match.Groups["offset"].Value; + + try + { + DateTime parsed = DateTime.ParseExact( + dateTime, + "yyyy-MM-dd'T'HH:mm:ss", + CultureInfo.InvariantCulture, + DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal); + // TODO: It would be nice not to have to create all these objects... easy to optimize later though. + Timestamp timestamp = Timestamp.FromDateTime(parsed); + int nanosToAdd = 0; + if (subseconds != "") + { + // This should always work, as we've got 1-9 digits. + int parsedFraction = int.Parse(subseconds.Substring(1), CultureInfo.InvariantCulture); + nanosToAdd = parsedFraction * SubsecondScalingFactors[subseconds.Length]; + } + int secondsToAdd = 0; + if (offset != "Z") + { + // This is the amount we need to *subtract* from the local time to get to UTC - hence - => +1 and vice versa. + int sign = offset[0] == '-' ? 1 : -1; + int hours = int.Parse(offset.Substring(1, 2), CultureInfo.InvariantCulture); + int minutes = int.Parse(offset.Substring(4, 2)); + int totalMinutes = hours * 60 + minutes; + if (totalMinutes > 18 * 60) + { + throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); + } + if (totalMinutes == 0 && sign == 1) + { + // This is an offset of -00:00, which means "unknown local offset". It makes no sense for a timestamp. + throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); + } + // We need to *subtract* the offset from local time to get UTC. + secondsToAdd = sign * totalMinutes * 60; + } + // Ensure we've got the right signs. Currently unnecessary, but easy to do. + if (secondsToAdd < 0 && nanosToAdd > 0) + { + secondsToAdd++; + nanosToAdd = nanosToAdd - Duration.NanosecondsPerSecond; + } + if (secondsToAdd != 0 || nanosToAdd != 0) + { + timestamp += new Duration { Nanos = nanosToAdd, Seconds = secondsToAdd }; + // The resulting timestamp after offset change would be out of our expected range. Currently the Timestamp message doesn't validate this + // anywhere, but we shouldn't parse it. + if (timestamp.Seconds < Timestamp.UnixSecondsAtBclMinValue || timestamp.Seconds > Timestamp.UnixSecondsAtBclMaxValue) + { + throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); + } + } + message.Descriptor.Fields[Timestamp.SecondsFieldNumber].Accessor.SetValue(message, timestamp.Seconds); + message.Descriptor.Fields[Timestamp.NanosFieldNumber].Accessor.SetValue(message, timestamp.Nanos); + } + catch (FormatException) + { + throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); + } + } + + private static void MergeDuration(IMessage message, JsonToken token) + { + if (token.Type != JsonToken.TokenType.StringValue) + { + throw new InvalidProtocolBufferException("Expected string value for Duration"); + } + var match = DurationRegex.Match(token.StringValue); + if (!match.Success) + { + throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); + } + var sign = match.Groups["sign"].Value; + var secondsText = match.Groups["int"].Value; + // Prohibit leading insignficant zeroes + if (secondsText[0] == '0' && secondsText.Length > 1) + { + throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); + } + var subseconds = match.Groups["subseconds"].Value; + var multiplier = sign == "-" ? -1 : 1; + + try + { + long seconds = long.Parse(secondsText, CultureInfo.InvariantCulture); + int nanos = 0; + if (subseconds != "") + { + // This should always work, as we've got 1-9 digits. + int parsedFraction = int.Parse(subseconds.Substring(1)); + nanos = parsedFraction * SubsecondScalingFactors[subseconds.Length]; + } + if (seconds >= Duration.MaxSeconds) + { + // Allow precisely 315576000000 seconds, but prohibit even 1ns more. + if (seconds > Duration.MaxSeconds || nanos > 0) + { + throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); + } + } + message.Descriptor.Fields[Duration.SecondsFieldNumber].Accessor.SetValue(message, seconds * multiplier); + message.Descriptor.Fields[Duration.NanosFieldNumber].Accessor.SetValue(message, nanos * multiplier); + } + catch (FormatException) + { + throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); + } + } + + private static void MergeFieldMask(IMessage message, JsonToken token) + { + if (token.Type != JsonToken.TokenType.StringValue) + { + throw new InvalidProtocolBufferException("Expected string value for FieldMask"); + } + // TODO: Do we *want* to remove empty entries? Probably okay to treat "" as "no paths", but "foo,,bar"? + string[] jsonPaths = token.StringValue.Split(FieldMaskPathSeparators, StringSplitOptions.RemoveEmptyEntries); + IList messagePaths = (IList) message.Descriptor.Fields[FieldMask.PathsFieldNumber].Accessor.GetValue(message); + foreach (var path in jsonPaths) + { + messagePaths.Add(ToSnakeCase(path)); + } + } + + // Ported from src/google/protobuf/util/internal/utility.cc + private static string ToSnakeCase(string text) + { + var builder = new StringBuilder(text.Length * 2); + bool wasNotUnderscore = false; // Initialize to false for case 1 (below) + bool wasNotCap = false; + + for (int i = 0; i < text.Length; i++) + { + char c = text[i]; + if (c >= 'A' && c <= 'Z') // ascii_isupper + { + // Consider when the current character B is capitalized: + // 1) At beginning of input: "B..." => "b..." + // (e.g. "Biscuit" => "biscuit") + // 2) Following a lowercase: "...aB..." => "...a_b..." + // (e.g. "gBike" => "g_bike") + // 3) At the end of input: "...AB" => "...ab" + // (e.g. "GoogleLAB" => "google_lab") + // 4) Followed by a lowercase: "...ABc..." => "...a_bc..." + // (e.g. "GBike" => "g_bike") + if (wasNotUnderscore && // case 1 out + (wasNotCap || // case 2 in, case 3 out + (i + 1 < text.Length && // case 3 out + (text[i + 1] >= 'a' && text[i + 1] <= 'z')))) // ascii_islower(text[i + 1]) + { // case 4 in + // We add an underscore for case 2 and case 4. + builder.Append('_'); + } + // ascii_tolower, but we already know that c *is* an upper case ASCII character... + builder.Append((char) (c + 'a' - 'A')); + wasNotUnderscore = true; + wasNotCap = false; + } + else + { + builder.Append(c); + wasNotUnderscore = c != '_'; + wasNotCap = true; + } + } + return builder.ToString(); + } + #endregion + + /// + /// Settings controlling JSON parsing. (Currently doesn't have any actual settings, but I suspect + /// we'll want them for levels of strictness, descriptor pools for Any handling, etc.) + /// + public sealed class Settings + { + private static readonly Settings defaultInstance = new Settings(); + + // TODO: Add recursion limit. + + /// + /// Default settings, as used by + /// + public static Settings Default { get { return defaultInstance; } } + + /// + /// Creates a new object. + /// + public Settings() + { + } + } + } +} diff --git a/csharp/src/Google.Protobuf/JsonToken.cs b/csharp/src/Google.Protobuf/JsonToken.cs new file mode 100644 index 0000000000..6c0138ccb6 --- /dev/null +++ b/csharp/src/Google.Protobuf/JsonToken.cs @@ -0,0 +1,166 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +using System; + +namespace Google.Protobuf +{ + internal sealed class JsonToken : IEquatable + { + // Tokens with no value can be reused. + private static readonly JsonToken _true = new JsonToken(TokenType.True); + private static readonly JsonToken _false = new JsonToken(TokenType.False); + private static readonly JsonToken _null = new JsonToken(TokenType.Null); + private static readonly JsonToken startObject = new JsonToken(TokenType.StartObject); + private static readonly JsonToken endObject = new JsonToken(TokenType.EndObject); + private static readonly JsonToken startArray = new JsonToken(TokenType.StartArray); + private static readonly JsonToken endArray = new JsonToken(TokenType.EndArray); + private static readonly JsonToken endDocument = new JsonToken(TokenType.EndDocument); + + internal static JsonToken Null { get { return _null; } } + internal static JsonToken False { get { return _false; } } + internal static JsonToken True { get { return _true; } } + internal static JsonToken StartObject{ get { return startObject; } } + internal static JsonToken EndObject { get { return endObject; } } + internal static JsonToken StartArray { get { return startArray; } } + internal static JsonToken EndArray { get { return endArray; } } + internal static JsonToken EndDocument { get { return endDocument; } } + + internal static JsonToken Name(string name) + { + return new JsonToken(TokenType.Name, stringValue: name); + } + + internal static JsonToken Value(string value) + { + return new JsonToken(TokenType.StringValue, stringValue: value); + } + + internal static JsonToken Value(double value) + { + return new JsonToken(TokenType.Number, numberValue: value); + } + + internal enum TokenType + { + Null, + False, + True, + StringValue, + Number, + Name, + StartObject, + EndObject, + StartArray, + EndArray, + EndDocument + } + + // A value is a string, number, array, object, null, true or false + // Arrays and objects have start/end + // A document consists of a value + // Objects are name/value sequences. + + private readonly TokenType type; + private readonly string stringValue; + private readonly double numberValue; + + internal TokenType Type { get { return type; } } + internal string StringValue { get { return stringValue; } } + internal double NumberValue { get { return numberValue; } } + + private JsonToken(TokenType type, string stringValue = null, double numberValue = 0) + { + this.type = type; + this.stringValue = stringValue; + this.numberValue = numberValue; + } + + public override bool Equals(object obj) + { + return Equals(obj as JsonToken); + } + + public override int GetHashCode() + { + unchecked + { + int hash = 17; + hash = hash * 31 + (int) type; + hash = hash * 31 + stringValue == null ? 0 : stringValue.GetHashCode(); + hash = hash * 31 + numberValue.GetHashCode(); + return hash; + } + } + + public override string ToString() + { + switch (type) + { + case TokenType.Null: + return "null"; + case TokenType.True: + return "true"; + case TokenType.False: + return "false"; + case TokenType.Name: + return "name (" + stringValue + ")"; + case TokenType.StringValue: + return "value (" + stringValue + ")"; + case TokenType.Number: + return "number (" + numberValue + ")"; + case TokenType.StartObject: + return "start-object"; + case TokenType.EndObject: + return "end-object"; + case TokenType.StartArray: + return "start-array"; + case TokenType.EndArray: + return "end-array"; + case TokenType.EndDocument: + return "end-document"; + default: + throw new InvalidOperationException("Token is of unknown type " + type); + } + } + + public bool Equals(JsonToken other) + { + if (ReferenceEquals(other, null)) + { + return false; + } + // Note use of other.numberValue.Equals rather than ==, so that NaN compares appropriately. + return other.type == type && other.stringValue == stringValue && other.numberValue.Equals(numberValue); + } + } +} diff --git a/csharp/src/Google.Protobuf/JsonTokenizer.cs b/csharp/src/Google.Protobuf/JsonTokenizer.cs new file mode 100644 index 0000000000..108ccebe81 --- /dev/null +++ b/csharp/src/Google.Protobuf/JsonTokenizer.cs @@ -0,0 +1,633 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion +using System; +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Text; + +namespace Google.Protobuf +{ + /// + /// Simple but strict JSON tokenizer, rigidly following RFC 7159. + /// + /// + /// + /// This tokenizer is stateful, and only returns "useful" tokens - names, values etc. + /// It does not create tokens for the separator between names and values, or for the comma + /// between values. It validates the token stream as it goes - so callers can assume that the + /// tokens it produces are appropriate. For example, it would never produce "start object, end array." + /// + /// Not thread-safe. + /// + internal sealed class JsonTokenizer + { + // The set of states in which a value is valid next token. + private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument; + + private readonly Stack containerStack = new Stack(); + private readonly PushBackReader reader; + private JsonToken bufferedToken; + private State state; + + internal JsonTokenizer(TextReader reader) + { + this.reader = new PushBackReader(reader); + state = State.StartOfDocument; + containerStack.Push(ContainerType.Document); + } + + internal void PushBack(JsonToken token) + { + if (bufferedToken != null) + { + throw new InvalidOperationException("Can't push back twice"); + } + bufferedToken = token; + } + + /// + /// Returns the next JSON token in the stream. An EndDocument token is returned to indicate the end of the stream, + /// after which point Next() should not be called again. + /// + /// + /// This method essentially just loops through characters skipping whitespace, validating and + /// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon) + /// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point + /// it returns the token. Although the method is large, it would be relatively hard to break down further... most + /// of it is the large switch statement, which sometimes returns and sometimes doesn't. + /// + /// The next token in the stream. This is never null. + /// This method is called after an EndDocument token has been returned + internal JsonToken Next() + { + if (bufferedToken != null) + { + var ret = bufferedToken; + bufferedToken = null; + return ret; + } + if (state == State.ReaderExhausted) + { + throw new InvalidOperationException("Next() called after end of document"); + } + while (true) + { + var next = reader.Read(); + if (next == null) + { + ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: "); + state = State.ReaderExhausted; + return JsonToken.EndDocument; + } + switch (next.Value) + { + // Skip whitespace between tokens + case ' ': + case '\t': + case '\r': + case '\n': + break; + case ':': + ValidateState(State.ObjectBeforeColon, "Invalid state to read a colon: "); + state = State.ObjectAfterColon; + break; + case ',': + ValidateState(State.ObjectAfterProperty | State.ArrayAfterValue, "Invalid state to read a colon: "); + state = state == State.ObjectAfterProperty ? State.ObjectAfterComma : State.ArrayAfterComma; + break; + case '"': + string stringValue = ReadString(); + if ((state & (State.ObjectStart | State.ObjectAfterComma)) != 0) + { + state = State.ObjectBeforeColon; + return JsonToken.Name(stringValue); + } + else + { + ValidateAndModifyStateForValue("Invalid state to read a double quote: "); + return JsonToken.Value(stringValue); + } + case '{': + ValidateState(ValueStates, "Invalid state to read an open brace: "); + state = State.ObjectStart; + containerStack.Push(ContainerType.Object); + return JsonToken.StartObject; + case '}': + ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: "); + PopContainer(); + return JsonToken.EndObject; + case '[': + ValidateState(ValueStates, "Invalid state to read an open square bracket: "); + state = State.ArrayStart; + containerStack.Push(ContainerType.Array); + return JsonToken.StartArray; + case ']': + ValidateState(State.ArrayAfterValue | State.ArrayStart, "Invalid state to read a close square bracket: "); + PopContainer(); + return JsonToken.EndArray; + case 'n': // Start of null + ConsumeLiteral("null"); + ValidateAndModifyStateForValue("Invalid state to read a null literal: "); + return JsonToken.Null; + case 't': // Start of true + ConsumeLiteral("true"); + ValidateAndModifyStateForValue("Invalid state to read a true literal: "); + return JsonToken.True; + case 'f': // Start of false + ConsumeLiteral("false"); + ValidateAndModifyStateForValue("Invalid state to read a false literal: "); + return JsonToken.False; + case '-': // Start of a number + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + double number = ReadNumber(next.Value); + ValidateAndModifyStateForValue("Invalid state to read a number token: "); + return JsonToken.Value(number); + default: + throw new InvalidProtocolBufferException("Invalid first character of token: " + next.Value); + } + } + } + + private void ValidateState(State validStates, string errorPrefix) + { + if ((validStates & state) == 0) + { + throw new InvalidProtocolBufferException(errorPrefix + state); + } + } + + /// + /// Reads a string token. It is assumed that the opening " has already been read. + /// + private string ReadString() + { + var value = new StringBuilder(); + bool haveHighSurrogate = false; + while (true) + { + char c = reader.ReadOrFail("Unexpected end of text while reading string"); + if (c < ' ') + { + throw new InvalidProtocolBufferException(string.Format(CultureInfo.InvariantCulture, "Invalid character in string literal: U+{0:x4}", (int) c)); + } + if (c == '"') + { + if (haveHighSurrogate) + { + throw new InvalidProtocolBufferException("Invalid use of surrogate pair code units"); + } + return value.ToString(); + } + if (c == '\\') + { + c = ReadEscapedCharacter(); + } + // TODO: Consider only allowing surrogate pairs that are either both escaped, + // or both not escaped. It would be a very odd text stream that contained a "lone" high surrogate + // followed by an escaped low surrogate or vice versa... and that couldn't even be represented in UTF-8. + if (haveHighSurrogate != char.IsLowSurrogate(c)) + { + throw new InvalidProtocolBufferException("Invalid use of surrogate pair code units"); + } + haveHighSurrogate = char.IsHighSurrogate(c); + value.Append(c); + } + } + + /// + /// Reads an escaped character. It is assumed that the leading backslash has already been read. + /// + private char ReadEscapedCharacter() + { + char c = reader.ReadOrFail("Unexpected end of text while reading character escape sequence"); + switch (c) + { + case 'n': + return '\n'; + case '\\': + return '\\'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'r': + return '\r'; + case 't': + return '\t'; + case '"': + return '"'; + case '/': + return '/'; + case 'u': + return ReadUnicodeEscape(); + default: + throw new InvalidProtocolBufferException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); + } + } + + /// + /// Reads an escaped Unicode 4-nybble hex sequence. It is assumed that the leading \u has already been read. + /// + private char ReadUnicodeEscape() + { + int result = 0; + for (int i = 0; i < 4; i++) + { + char c = reader.ReadOrFail("Unexpected end of text while reading Unicode escape sequence"); + int nybble; + if (c >= '0' && c <= '9') + { + nybble = c - '0'; + } + else if (c >= 'a' && c <= 'f') + { + nybble = c - 'a' + 10; + } + else if (c >= 'A' && c <= 'F') + { + nybble = c - 'A' + 10; + } + else + { + throw new InvalidProtocolBufferException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); + } + result = (result << 4) + nybble; + } + return (char) result; + } + + /// + /// Consumes a text-only literal, throwing an exception if the read text doesn't match it. + /// It is assumed that the first letter of the literal has already been read. + /// + private void ConsumeLiteral(string text) + { + for (int i = 1; i < text.Length; i++) + { + char? next = reader.Read(); + if (next == null) + { + throw new InvalidProtocolBufferException("Unexpected end of text while reading literal token " + text); + } + if (next.Value != text[i]) + { + throw new InvalidProtocolBufferException("Unexpected character while reading literal token " + text); + } + } + } + + private double ReadNumber(char initialCharacter) + { + StringBuilder builder = new StringBuilder(); + if (initialCharacter == '-') + { + builder.Append("-"); + } + else + { + reader.PushBack(initialCharacter); + } + // Each method returns the character it read that doesn't belong in that part, + // so we know what to do next, including pushing the character back at the end. + // null is returned for "end of text". + char? next = ReadInt(builder); + if (next == '.') + { + next = ReadFrac(builder); + } + if (next == 'e' || next == 'E') + { + next = ReadExp(builder); + } + // If we read a character which wasn't part of the number, push it back so we can read it again + // to parse the next token. + if (next != null) + { + reader.PushBack(next.Value); + } + + // TODO: What exception should we throw if the value can't be represented as a double? + try + { + return double.Parse(builder.ToString(), + NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, + CultureInfo.InvariantCulture); + } + catch (OverflowException) + { + throw new InvalidProtocolBufferException("Numeric value out of range: " + builder); + } + } + + private char? ReadInt(StringBuilder builder) + { + char first = reader.ReadOrFail("Invalid numeric literal"); + if (first < '0' || first > '9') + { + throw new InvalidProtocolBufferException("Invalid numeric literal"); + } + builder.Append(first); + int digitCount; + char? next = ConsumeDigits(builder, out digitCount); + if (first == '0' && digitCount != 0) + { + throw new InvalidProtocolBufferException("Invalid numeric literal: leading 0 for non-zero value."); + } + return next; + } + + private char? ReadFrac(StringBuilder builder) + { + builder.Append('.'); // Already consumed this + int digitCount; + char? next = ConsumeDigits(builder, out digitCount); + if (digitCount == 0) + { + throw new InvalidProtocolBufferException("Invalid numeric literal: fraction with no trailing digits"); + } + return next; + } + + private char? ReadExp(StringBuilder builder) + { + builder.Append('E'); // Already consumed this (or 'e') + char? next = reader.Read(); + if (next == null) + { + throw new InvalidProtocolBufferException("Invalid numeric literal: exponent with no trailing digits"); + } + if (next == '-' || next == '+') + { + builder.Append(next.Value); + } + else + { + reader.PushBack(next.Value); + } + int digitCount; + next = ConsumeDigits(builder, out digitCount); + if (digitCount == 0) + { + throw new InvalidProtocolBufferException("Invalid numeric literal: exponent without value"); + } + return next; + } + + private char? ConsumeDigits(StringBuilder builder, out int count) + { + count = 0; + while (true) + { + char? next = reader.Read(); + if (next == null || next.Value < '0' || next.Value > '9') + { + return next; + } + count++; + builder.Append(next.Value); + } + } + + /// + /// Validates that we're in a valid state to read a value (using the given error prefix if necessary) + /// and changes the state to the appropriate one, e.g. ObjectAfterColon to ObjectAfterProperty. + /// + private void ValidateAndModifyStateForValue(string errorPrefix) + { + ValidateState(ValueStates, errorPrefix); + switch (state) + { + case State.StartOfDocument: + state = State.ExpectedEndOfDocument; + return; + case State.ObjectAfterColon: + state = State.ObjectAfterProperty; + return; + case State.ArrayStart: + case State.ArrayAfterComma: + state = State.ArrayAfterValue; + return; + default: + throw new InvalidOperationException("ValidateAndModifyStateForValue does not handle all value states (and should)"); + } + } + + /// + /// Pops the top-most container, and sets the state to the appropriate one for the end of a value + /// in the parent container. + /// + private void PopContainer() + { + containerStack.Pop(); + var parent = containerStack.Peek(); + switch (parent) + { + case ContainerType.Object: + state = State.ObjectAfterProperty; + break; + case ContainerType.Array: + state = State.ArrayAfterValue; + break; + case ContainerType.Document: + state = State.ExpectedEndOfDocument; + break; + default: + throw new InvalidOperationException("Unexpected container type: " + parent); + } + } + + private enum ContainerType + { + Document, Object, Array + } + + /// + /// Possible states of the tokenizer. + /// + /// + /// This is a flags enum purely so we can simply and efficiently represent a set of valid states + /// for checking. + /// + /// Each is documented with an example, + /// where ^ represents the current position within the text stream. The examples all use string values, + /// but could be any value, including nested objects/arrays. + /// The complete state of the tokenizer also includes a stack to indicate the contexts (arrays/objects). + /// Any additional notional state of "AfterValue" indicates that a value has been completed, at which + /// point there's an immediate transition to ExpectedEndOfDocument, ObjectAfterProperty or ArrayAfterValue. + /// + /// + /// These states were derived manually by reading RFC 7159 carefully. + /// + /// + [Flags] + private enum State + { + /// + /// ^ { "foo": "bar" } + /// Before the value in a document. Next states: ObjectStart, ArrayStart, "AfterValue" + /// + StartOfDocument = 1 << 0, + /// + /// { "foo": "bar" } ^ + /// After the value in a document. Next states: ReaderExhausted + /// + ExpectedEndOfDocument = 1 << 1, + /// + /// { "foo": "bar" } ^ (and already read to the end of the reader) + /// Terminal state. + /// + ReaderExhausted = 1 << 2, + /// + /// { ^ "foo": "bar" } + /// Before the *first* property in an object. + /// Next states: + /// "AfterValue" (empty object) + /// ObjectBeforeColon (read a name) + /// + ObjectStart = 1 << 3, + /// + /// { "foo" ^ : "bar", "x": "y" } + /// Next state: ObjectAfterColon + /// + ObjectBeforeColon = 1 << 4, + /// + /// { "foo" : ^ "bar", "x": "y" } + /// Before any property other than the first in an object. + /// (Equivalently: after any property in an object) + /// Next states: + /// "AfterValue" (value is simple) + /// ObjectStart (value is object) + /// ArrayStart (value is array) + /// + ObjectAfterColon = 1 << 5, + /// + /// { "foo" : "bar" ^ , "x" : "y" } + /// At the end of a property, so expecting either a comma or end-of-object + /// Next states: ObjectAfterComma or "AfterValue" + /// + ObjectAfterProperty = 1 << 6, + /// + /// { "foo":"bar", ^ "x":"y" } + /// Read the comma after the previous property, so expecting another property. + /// This is like ObjectStart, but closing brace isn't valid here + /// Next state: ObjectBeforeColon. + /// + ObjectAfterComma = 1 << 7, + /// + /// [ ^ "foo", "bar" ] + /// Before the *first* value in an array. + /// Next states: + /// "AfterValue" (read a value) + /// "AfterValue" (end of array; will pop stack) + /// + ArrayStart = 1 << 8, + /// + /// [ "foo" ^ , "bar" ] + /// After any value in an array, so expecting either a comma or end-of-array + /// Next states: ArrayAfterComma or "AfterValue" + /// + ArrayAfterValue = 1 << 9, + /// + /// [ "foo", ^ "bar" ] + /// After a comma in an array, so there *must* be another value (simple or complex). + /// Next states: "AfterValue" (simple value), StartObject, StartArray + /// + ArrayAfterComma = 1 << 10 + } + + /// + /// Wrapper around a text reader allowing small amounts of buffering and location handling. + /// + private class PushBackReader + { + // TODO: Add locations for errors etc. + + private readonly TextReader reader; + + internal PushBackReader(TextReader reader) + { + // TODO: Wrap the reader in a BufferedReader? + this.reader = reader; + } + + /// + /// The buffered next character, if we have one. + /// + private char? nextChar; + + /// + /// Returns the next character in the stream, or null if we have reached the end. + /// + /// + internal char? Read() + { + if (nextChar != null) + { + char? tmp = nextChar; + nextChar = null; + return tmp; + } + int next = reader.Read(); + return next == -1 ? null : (char?) next; + } + + internal char ReadOrFail(string messageOnFailure) + { + char? next = Read(); + if (next == null) + { + throw new InvalidProtocolBufferException(messageOnFailure); + } + return next.Value; + } + + internal void PushBack(char c) + { + if (nextChar != null) + { + throw new InvalidOperationException("Cannot push back when already buffering a character"); + } + nextChar = c; + } + } + } +} diff --git a/csharp/src/Google.Protobuf/MessageParser.cs b/csharp/src/Google.Protobuf/MessageParser.cs index 6a6f101749..70c52ba6e0 100644 --- a/csharp/src/Google.Protobuf/MessageParser.cs +++ b/csharp/src/Google.Protobuf/MessageParser.cs @@ -142,5 +142,17 @@ namespace Google.Protobuf message.MergeFrom(input); return message; } + + /// + /// Parses a message from the given JSON. + /// + /// The JSON to parse. + /// The parsed message. + public T ParseJson(string json) + { + T message = factory(); + JsonParser.Default.Merge(message, json); + return message; + } } } diff --git a/csharp/src/Google.Protobuf/Properties/AssemblyInfo.cs b/csharp/src/Google.Protobuf/Properties/AssemblyInfo.cs index 9a484ad66a..225ac0ddfd 100644 --- a/csharp/src/Google.Protobuf/Properties/AssemblyInfo.cs +++ b/csharp/src/Google.Protobuf/Properties/AssemblyInfo.cs @@ -46,7 +46,10 @@ using System.Security; [assembly: AssemblyCopyright("Copyright © 2015")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] + +#if !NCRUNCH [assembly: AllowPartiallyTrustedCallers] +#endif #if SIGNED [assembly: InternalsVisibleTo("Google.Protobuf.Test, PublicKey=" + diff --git a/csharp/src/Google.Protobuf/WellKnownTypes/DurationPartial.cs b/csharp/src/Google.Protobuf/WellKnownTypes/DurationPartial.cs index 18ebefd263..324f48fc05 100644 --- a/csharp/src/Google.Protobuf/WellKnownTypes/DurationPartial.cs +++ b/csharp/src/Google.Protobuf/WellKnownTypes/DurationPartial.cs @@ -47,6 +47,16 @@ namespace Google.Protobuf.WellKnownTypes /// public const int NanosecondsPerTick = 100; + /// + /// The maximum permitted number of seconds. + /// + public const long MaxSeconds = 315576000000L; + + /// + /// The minimum permitted number of seconds. + /// + public const long MinSeconds = -315576000000L; + /// /// Converts this to a . /// diff --git a/csharp/src/Google.Protobuf/WellKnownTypes/TimestampPartial.cs b/csharp/src/Google.Protobuf/WellKnownTypes/TimestampPartial.cs index 6858435820..d284acd63f 100644 --- a/csharp/src/Google.Protobuf/WellKnownTypes/TimestampPartial.cs +++ b/csharp/src/Google.Protobuf/WellKnownTypes/TimestampPartial.cs @@ -38,6 +38,8 @@ namespace Google.Protobuf.WellKnownTypes { private static readonly DateTime UnixEpoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc); private static readonly long BclSecondsAtUnixEpoch = UnixEpoch.Ticks / TimeSpan.TicksPerSecond; + internal static readonly long UnixSecondsAtBclMinValue = -BclSecondsAtUnixEpoch; + internal static readonly long UnixSecondsAtBclMaxValue = (DateTime.MaxValue.Ticks / TimeSpan.TicksPerSecond) - BclSecondsAtUnixEpoch; /// /// Returns the difference between one and another, as a . diff --git a/csharp/src/Google.Protobuf/WellKnownTypes/ValuePartial.cs b/csharp/src/Google.Protobuf/WellKnownTypes/ValuePartial.cs new file mode 100644 index 0000000000..b4b6c05e87 --- /dev/null +++ b/csharp/src/Google.Protobuf/WellKnownTypes/ValuePartial.cs @@ -0,0 +1,99 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +namespace Google.Protobuf.WellKnownTypes +{ + public partial class Value + { + /// + /// Convenience method to create a Value message with a string value. + /// + /// Value to set for the StringValue property. + /// A newly-created Value message with the given value. + public static Value ForString(string value) + { + Preconditions.CheckNotNull(value, "value"); + return new Value { StringValue = value }; + } + + /// + /// Convenience method to create a Value message with a number value. + /// + /// Value to set for the NumberValue property. + /// A newly-created Value message with the given value. + public static Value ForNumber(double value) + { + return new Value { NumberValue = value }; + } + + /// + /// Convenience method to create a Value message with a Boolean value. + /// + /// Value to set for the BoolValue property. + /// A newly-created Value message with the given value. + public static Value ForBool(bool value) + { + return new Value { BoolValue = value }; + } + + /// + /// Convenience method to create a Value message with a null initial value. + /// + /// A newly-created Value message a null initial value. + public static Value ForNull() + { + return new Value { NullValue = 0 }; + } + + /// + /// Convenience method to create a Value message with an initial list of values. + /// + /// The values provided are not cloned; the references are copied directly. + /// A newly-created Value message an initial list value. + public static Value ForList(params Value[] values) + { + Preconditions.CheckNotNull(values, "values"); + return new Value { ListValue = new ListValue { Values = { values } } }; + } + + /// + /// Convenience method to create a Value message with an initial struct value + /// + /// The value provided is not cloned; the reference is copied directly. + /// A newly-created Value message an initial struct value. + public static Value ForStruct(Struct value) + { + Preconditions.CheckNotNull(value, "value"); + return new Value { StructValue = value }; + } + } +} diff --git a/csharp/src/Google.Protobuf/WellKnownTypes/WrappersPartial.cs b/csharp/src/Google.Protobuf/WellKnownTypes/WrappersPartial.cs new file mode 100644 index 0000000000..efc13a01e8 --- /dev/null +++ b/csharp/src/Google.Protobuf/WellKnownTypes/WrappersPartial.cs @@ -0,0 +1,42 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +namespace Google.Protobuf.WellKnownTypes +{ + public static partial class Wrappers + { + /// + /// Field number for the single "value" field in all wrapper types. + /// + internal const int WrapperValueFieldNumber = Int32Value.ValueFieldNumber; + } +}