From 567579b50517e4f7efc459ab1d9d5ee2577af024 Mon Sep 17 00:00:00 2001 From: Jon Skeet Date: Mon, 23 Nov 2015 12:43:54 +0000 Subject: [PATCH 1/2] JSON formatting for Any. --- .../Google.Protobuf.Test/JsonFormatterTest.cs | 42 +++++++ csharp/src/Google.Protobuf/JsonFormatter.cs | 111 +++++++++++++++--- 2 files changed, 135 insertions(+), 18 deletions(-) diff --git a/csharp/src/Google.Protobuf.Test/JsonFormatterTest.cs b/csharp/src/Google.Protobuf.Test/JsonFormatterTest.cs index 08cedad8fe..8473b4be58 100644 --- a/csharp/src/Google.Protobuf.Test/JsonFormatterTest.cs +++ b/csharp/src/Google.Protobuf.Test/JsonFormatterTest.cs @@ -35,6 +35,7 @@ using Google.Protobuf.TestProtos; using NUnit.Framework; using UnitTest.Issues.TestProtos; using Google.Protobuf.WellKnownTypes; +using Google.Protobuf.Reflection; namespace Google.Protobuf { @@ -420,6 +421,47 @@ namespace Google.Protobuf AssertJson("{ 'fileName': 'foo.proto' }", JsonFormatter.Default.Format(message)); } + [Test] + public void AnyWellKnownType() + { + var formatter = new JsonFormatter(new JsonFormatter.Settings(false, TypeRegistry.FromMessages(Timestamp.Descriptor))); + var timestamp = new DateTime(1673, 6, 19, 12, 34, 56, DateTimeKind.Utc).ToTimestamp(); + var any = Any.Pack(timestamp); + AssertJson("{ '@type': 'type.googleapis.com/google.protobuf.Timestamp', 'value': '1673-06-19T12:34:56Z' }", formatter.Format(any)); + } + + [Test] + public void AnyMessageType() + { + var formatter = new JsonFormatter(new JsonFormatter.Settings(false, TypeRegistry.FromMessages(TestAllTypes.Descriptor))); + var message = new TestAllTypes { SingleInt32 = 10, SingleNestedMessage = new TestAllTypes.Types.NestedMessage { Bb = 20 } }; + var any = Any.Pack(message); + AssertJson("{ '@type': 'type.googleapis.com/protobuf_unittest.TestAllTypes', 'singleInt32': 10, 'singleNestedMessage': { 'bb': 20 } }", formatter.Format(any)); + } + + [Test] + public void AnyNested() + { + var registry = TypeRegistry.FromMessages(TestWellKnownTypes.Descriptor, TestAllTypes.Descriptor); + var formatter = new JsonFormatter(new JsonFormatter.Settings(false, registry)); + + // Nest an Any as the value of an Any. + var doubleNestedMessage = new TestAllTypes { SingleInt32 = 20 }; + var nestedMessage = Any.Pack(doubleNestedMessage); + var message = new TestWellKnownTypes { AnyField = Any.Pack(nestedMessage) }; + AssertJson("{ 'anyField': { '@type': 'type.googleapis.com/google.protobuf.Any', 'value': { '@type': 'type.googleapis.com/protobuf_unittest.TestAllTypes', 'singleInt32': 20 } } }", + formatter.Format(message)); + } + + [Test] + public void AnyUnknownType() + { + // The default type registry doesn't have any types in it. + var message = new TestAllTypes(); + var any = Any.Pack(message); + Assert.Throws(() => JsonFormatter.Default.Format(any)); + } + /// /// Checks that the actual JSON is the same as the expected JSON - but after replacing /// all apostrophes in the expected JSON with double quotes. This basically makes the tests easier diff --git a/csharp/src/Google.Protobuf/JsonFormatter.cs b/csharp/src/Google.Protobuf/JsonFormatter.cs index 51bb4bf37e..c7d392cd51 100644 --- a/csharp/src/Google.Protobuf/JsonFormatter.cs +++ b/csharp/src/Google.Protobuf/JsonFormatter.cs @@ -55,6 +55,12 @@ namespace Google.Protobuf /// public sealed class JsonFormatter { + internal const string AnyTypeUrlField = "@type"; + internal const string AnyWellKnownTypeValueField = "value"; + private const string TypeUrlPrefix = "type.googleapis.com"; + private const string NameValueSeparator = ": "; + private const string PropertySeparator = ", "; + private static JsonFormatter defaultInstance = new JsonFormatter(Settings.Default); /// @@ -130,7 +136,7 @@ namespace Google.Protobuf /// The formatted message. public string Format(IMessage message) { - Preconditions.CheckNotNull(message, "message"); + Preconditions.CheckNotNull(message, nameof(message)); StringBuilder builder = new StringBuilder(); if (message.Descriptor.IsWellKnownType) { @@ -151,13 +157,18 @@ namespace Google.Protobuf return; } builder.Append("{ "); + bool writtenFields = WriteMessageFields(builder, message, false); + builder.Append(writtenFields ? " }" : "}"); + } + + private bool WriteMessageFields(StringBuilder builder, IMessage message, bool assumeFirstFieldWritten) + { var fields = message.Descriptor.Fields; - bool first = true; + bool first = !assumeFirstFieldWritten; // First non-oneof fields foreach (var field in fields.InFieldNumberOrder()) { var accessor = field.Accessor; - // Oneofs are written later if (field.ContainingOneof != null && field.ContainingOneof.Accessor.GetCaseFieldDescriptor(message) != field) { continue; @@ -178,14 +189,14 @@ namespace Google.Protobuf // Okay, all tests complete: let's write the field value... if (!first) { - builder.Append(", "); + builder.Append(PropertySeparator); } WriteString(builder, ToCamelCase(accessor.Descriptor.Name)); - builder.Append(": "); + builder.Append(NameValueSeparator); WriteValue(builder, value); first = false; } - builder.Append(first ? "}" : " }"); + return !first; } // Converted from src/google/protobuf/util/internal/utility.cc ToCamelCase @@ -378,6 +389,8 @@ namespace Google.Protobuf /// private void WriteWellKnownTypeValue(StringBuilder builder, MessageDescriptor descriptor, object value, bool inField) { + // Currently, we can never actually get here, because null values are always handled by the caller. But if we *could*, + // this would do the right thing. if (value == null) { WriteNull(builder); @@ -429,6 +442,11 @@ namespace Google.Protobuf WriteStructFieldValue(builder, (IMessage) value); return; } + if (descriptor.FullName == Any.Descriptor.FullName) + { + WriteAny(builder, (IMessage) value); + return; + } WriteMessage(builder, (IMessage) value); } @@ -496,6 +514,46 @@ namespace Google.Protobuf AppendEscapedString(builder, string.Join(",", paths.Cast().Select(ToCamelCase))); } + private void WriteAny(StringBuilder builder, IMessage value) + { + string typeUrl = (string) value.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.GetValue(value); + ByteString data = (ByteString) value.Descriptor.Fields[Any.ValueFieldNumber].Accessor.GetValue(value); + string typeName = GetTypeName(typeUrl); + MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName); + if (descriptor == null) + { + throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'"); + } + IMessage message = descriptor.Parser.ParseFrom(data); + builder.Append("{ "); + WriteString(builder, AnyTypeUrlField); + builder.Append(NameValueSeparator); + WriteString(builder, typeUrl); + + if (descriptor.IsWellKnownType) + { + builder.Append(PropertySeparator); + WriteString(builder, AnyWellKnownTypeValueField); + builder.Append(NameValueSeparator); + WriteWellKnownTypeValue(builder, descriptor, message, true); + } + else + { + WriteMessageFields(builder, message, true); + } + builder.Append(" }"); + } + + internal static string GetTypeName(String typeUrl) + { + string[] parts = typeUrl.Split('/'); + if (parts.Length != 2 || parts[0] != TypeUrlPrefix) + { + throw new InvalidProtocolBufferException($"Invalid type url: {typeUrl}"); + } + return parts[1]; + } + /// /// Appends a number of nanoseconds to a StringBuilder. Either 0 digits are added (in which /// case no "." is appended), or 3 6 or 9 digits. @@ -537,10 +595,10 @@ namespace Google.Protobuf if (!first) { - builder.Append(", "); + builder.Append(PropertySeparator); } WriteString(builder, key); - builder.Append(": "); + builder.Append(NameValueSeparator); WriteStructFieldValue(builder, value); first = false; } @@ -590,7 +648,7 @@ namespace Google.Protobuf } if (!first) { - builder.Append(", "); + builder.Append(PropertySeparator); } WriteValue(builder, value); first = false; @@ -611,7 +669,7 @@ namespace Google.Protobuf } if (!first) { - builder.Append(", "); + builder.Append(PropertySeparator); } string keyText; if (pair.Key is string) @@ -635,7 +693,7 @@ namespace Google.Protobuf throw new ArgumentException("Unhandled dictionary key type: " + pair.Key.GetType()); } WriteString(builder, keyText); - builder.Append(": "); + builder.Append(NameValueSeparator); WriteValue(builder, pair.Value); first = false; } @@ -755,23 +813,40 @@ namespace Google.Protobuf /// /// Default settings, as used by /// - public static Settings Default { get { return defaultInstance; } } - - private readonly bool formatDefaultValues; + public static Settings Default { get; } = new Settings(false); /// /// Whether fields whose values are the default for the field type (e.g. 0 for integers) /// should be formatted (true) or omitted (false). /// - public bool FormatDefaultValues { get { return formatDefaultValues; } } + public bool FormatDefaultValues { get; } + + /// + /// The type registry used to format messages. + /// + public TypeRegistry TypeRegistry { get; } + + // TODO: Work out how we're going to scale this to multiple settings. "WithXyz" methods? + + /// + /// Creates a new object with the specified formatting of default values + /// and an empty type registry. + /// + /// true if default values (0, empty strings etc) should be formatted; false otherwise. + public Settings(bool formatDefaultValues) : this(formatDefaultValues, TypeRegistry.Empty) + { + } /// - /// Creates a new object with the specified formatting of default values. + /// Creates a new object with the specified formatting of default values + /// and type registry. /// /// true if default values (0, empty strings etc) should be formatted; false otherwise. - public Settings(bool formatDefaultValues) + /// The to use when formatting messages. + public Settings(bool formatDefaultValues, TypeRegistry typeRegistry) { - this.formatDefaultValues = formatDefaultValues; + FormatDefaultValues = formatDefaultValues; + TypeRegistry = Preconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)); } } } From 3de2fced6be1cc5e8f321c5aee2bb43176be962a Mon Sep 17 00:00:00 2001 From: Jon Skeet Date: Mon, 23 Nov 2015 16:21:47 +0000 Subject: [PATCH 2/2] Handle JSON parsing for Any. This required a rework of the tokenizer to allow for a "replaying" tokenizer, basically in case the @type value comes after the data itself. This rework is nice in some ways (all the pushback and object depth logic in one place) but is a little fragile in terms of token push-back when using the replay tokenizer. It'll be fine for the scenario we need it for, but we should be careful... --- .../Google.Protobuf.Test/JsonParserTest.cs | 50 + .../Google.Protobuf.Test/JsonTokenizerTest.cs | 14 +- csharp/src/Google.Protobuf/JsonFormatter.cs | 11 +- csharp/src/Google.Protobuf/JsonParser.cs | 149 ++- csharp/src/Google.Protobuf/JsonTokenizer.cs | 1046 +++++++++-------- 5 files changed, 761 insertions(+), 509 deletions(-) diff --git a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs index b3664770f2..874489e459 100644 --- a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs +++ b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs @@ -30,6 +30,7 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endregion +using Google.Protobuf.Reflection; using Google.Protobuf.TestProtos; using Google.Protobuf.WellKnownTypes; using NUnit.Framework; @@ -717,6 +718,55 @@ namespace Google.Protobuf CollectionAssert.AreEqual(expectedPaths, parsed.Paths); } + [Test] + public void Any_RegularMessage() + { + var registry = TypeRegistry.FromMessages(TestAllTypes.Descriptor); + var formatter = new JsonFormatter(new JsonFormatter.Settings(false, TypeRegistry.FromMessages(TestAllTypes.Descriptor))); + var message = new TestAllTypes { SingleInt32 = 10, SingleNestedMessage = new TestAllTypes.Types.NestedMessage { Bb = 20 } }; + var original = Any.Pack(message); + var json = formatter.Format(original); // This is tested in JsonFormatterTest + var parser = new JsonParser(new JsonParser.Settings(10, registry)); + Assert.AreEqual(original, parser.Parse(json)); + string valueFirstJson = "{ \"singleInt32\": 10, \"singleNestedMessage\": { \"bb\": 20 }, \"@type\": \"type.googleapis.com/protobuf_unittest.TestAllTypes\" }"; + Assert.AreEqual(original, parser.Parse(valueFirstJson)); + } + + [Test] + public void Any_UnknownType() + { + string json = "{ \"@type\": \"type.googleapis.com/bogus\" }"; + Assert.Throws(() => Any.Parser.ParseJson(json)); + } + + [Test] + public void Any_WellKnownType() + { + var registry = TypeRegistry.FromMessages(Timestamp.Descriptor); + var formatter = new JsonFormatter(new JsonFormatter.Settings(false, registry)); + var timestamp = new DateTime(1673, 6, 19, 12, 34, 56, DateTimeKind.Utc).ToTimestamp(); + var original = Any.Pack(timestamp); + var json = formatter.Format(original); // This is tested in JsonFormatterTest + var parser = new JsonParser(new JsonParser.Settings(10, registry)); + Assert.AreEqual(original, parser.Parse(json)); + string valueFirstJson = "{ \"value\": \"1673-06-19T12:34:56Z\", \"@type\": \"type.googleapis.com/google.protobuf.Timestamp\" }"; + Assert.AreEqual(original, parser.Parse(valueFirstJson)); + } + + [Test] + public void Any_Nested() + { + var registry = TypeRegistry.FromMessages(TestWellKnownTypes.Descriptor, TestAllTypes.Descriptor); + var formatter = new JsonFormatter(new JsonFormatter.Settings(false, registry)); + var parser = new JsonParser(new JsonParser.Settings(10, registry)); + var doubleNestedMessage = new TestAllTypes { SingleInt32 = 20 }; + var nestedMessage = Any.Pack(doubleNestedMessage); + var message = new TestWellKnownTypes { AnyField = Any.Pack(nestedMessage) }; + var json = formatter.Format(message); + // Use the descriptor-based parser just for a change. + Assert.AreEqual(message, parser.Parse(json, TestWellKnownTypes.Descriptor)); + } + [Test] public void DataAfterObject() { diff --git a/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs index a38efeedd4..a0a6222768 100644 --- a/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs +++ b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs @@ -85,7 +85,7 @@ namespace Google.Protobuf public void ObjectDepth() { string json = "{ \"foo\": { \"x\": 1, \"y\": [ 0 ] } }"; - var tokenizer = new JsonTokenizer(new StringReader(json)); + var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json)); // If we had more tests like this, I'd introduce a helper method... but for one test, it's not worth it. Assert.AreEqual(0, tokenizer.ObjectDepth); Assert.AreEqual(JsonToken.StartObject, tokenizer.Next()); @@ -118,7 +118,7 @@ namespace Google.Protobuf public void ObjectDepth_WithPushBack() { string json = "{}"; - var tokenizer = new JsonTokenizer(new StringReader(json)); + var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json)); Assert.AreEqual(0, tokenizer.ObjectDepth); var token = tokenizer.Next(); Assert.AreEqual(1, tokenizer.ObjectDepth); @@ -275,7 +275,7 @@ namespace Google.Protobuf // Note: we don't test that the earlier tokens are exactly as expected, // partly because that's hard to parameterize. var reader = new StringReader(json.Replace('\'', '"')); - var tokenizer = new JsonTokenizer(reader); + var tokenizer = JsonTokenizer.FromTextReader(reader); for (int i = 0; i < expectedValidTokens; i++) { Assert.IsNotNull(tokenizer.Next()); @@ -334,7 +334,7 @@ namespace Google.Protobuf [Test] public void NextAfterEndDocumentThrows() { - var tokenizer = new JsonTokenizer(new StringReader("null")); + var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null")); Assert.AreEqual(JsonToken.Null, tokenizer.Next()); Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); Assert.Throws(() => tokenizer.Next()); @@ -343,7 +343,7 @@ namespace Google.Protobuf [Test] public void CanPushBackEndDocument() { - var tokenizer = new JsonTokenizer(new StringReader("null")); + var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null")); Assert.AreEqual(JsonToken.Null, tokenizer.Next()); Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); tokenizer.PushBack(JsonToken.EndDocument); @@ -373,7 +373,7 @@ namespace Google.Protobuf private static void AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens) { var reader = new StringReader(json); - var tokenizer = new JsonTokenizer(reader); + var tokenizer = JsonTokenizer.FromTextReader(reader); for (int i = 0; i < expectedTokens.Length; i++) { var actualToken = tokenizer.Next(); @@ -393,7 +393,7 @@ namespace Google.Protobuf private static void AssertThrowsAfter(string json, params JsonToken[] expectedTokens) { var reader = new StringReader(json); - var tokenizer = new JsonTokenizer(reader); + var tokenizer = JsonTokenizer.FromTextReader(reader); for (int i = 0; i < expectedTokens.Length; i++) { var actualToken = tokenizer.Next(); diff --git a/csharp/src/Google.Protobuf/JsonFormatter.cs b/csharp/src/Google.Protobuf/JsonFormatter.cs index c7d392cd51..45941b3919 100644 --- a/csharp/src/Google.Protobuf/JsonFormatter.cs +++ b/csharp/src/Google.Protobuf/JsonFormatter.cs @@ -808,12 +808,17 @@ namespace Google.Protobuf /// public sealed class Settings { - private static readonly Settings defaultInstance = new Settings(false); - /// /// Default settings, as used by /// - public static Settings Default { get; } = new Settings(false); + public static Settings Default { get; } + + // Workaround for the Mono compiler complaining about XML comments not being on + // valid language elements. + static Settings() + { + Default = new Settings(false); + } /// /// Whether fields whose values are the default for the field type (e.g. 0 for integers) diff --git a/csharp/src/Google.Protobuf/JsonParser.cs b/csharp/src/Google.Protobuf/JsonParser.cs index 2019029b6f..95f9ad351e 100644 --- a/csharp/src/Google.Protobuf/JsonParser.cs +++ b/csharp/src/Google.Protobuf/JsonParser.cs @@ -77,6 +77,7 @@ namespace Google.Protobuf { ListValue.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) }, { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) }, + { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) }, { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) }, { Int32Value.Descriptor.FullName, MergeWrapperField }, { Int64Value.Descriptor.FullName, MergeWrapperField }, @@ -128,7 +129,7 @@ namespace Google.Protobuf /// Reader providing the JSON to parse. internal void Merge(IMessage message, TextReader jsonReader) { - var tokenizer = new JsonTokenizer(jsonReader); + var tokenizer = JsonTokenizer.FromTextReader(jsonReader); Merge(message, tokenizer); var lastToken = tokenizer.Next(); if (lastToken != JsonToken.EndDocument) @@ -338,6 +339,7 @@ namespace Google.Protobuf /// The JSON does not represent a Protocol Buffers message correctly public T Parse(string json) where T : IMessage, new() { + Preconditions.CheckNotNull(json, nameof(json)); return Parse(new StringReader(json)); } @@ -350,11 +352,42 @@ namespace Google.Protobuf /// The JSON does not represent a Protocol Buffers message correctly public T Parse(TextReader jsonReader) where T : IMessage, new() { + Preconditions.CheckNotNull(jsonReader, nameof(jsonReader)); T message = new T(); Merge(message, jsonReader); return message; } + /// + /// Parses into a new message. + /// + /// The JSON to parse. + /// Descriptor of message type to parse. + /// The JSON does not comply with RFC 7159 + /// The JSON does not represent a Protocol Buffers message correctly + public IMessage Parse(string json, MessageDescriptor descriptor) + { + Preconditions.CheckNotNull(json, nameof(json)); + Preconditions.CheckNotNull(descriptor, nameof(descriptor)); + return Parse(new StringReader(json), descriptor); + } + + /// + /// Parses JSON read from into a new message. + /// + /// Reader providing the JSON to parse. + /// Descriptor of message type to parse. + /// The JSON does not comply with RFC 7159 + /// The JSON does not represent a Protocol Buffers message correctly + public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor) + { + Preconditions.CheckNotNull(jsonReader, nameof(jsonReader)); + Preconditions.CheckNotNull(descriptor, nameof(descriptor)); + IMessage message = descriptor.Parser.CreateTemplate(); + Merge(message, jsonReader); + return message; + } + private void MergeStructValue(IMessage message, JsonTokenizer tokenizer) { var firstToken = tokenizer.Next(); @@ -410,6 +443,83 @@ namespace Google.Protobuf MergeMapField(message, field, tokenizer); } + private void MergeAny(IMessage message, JsonTokenizer tokenizer) + { + // Record the token stream until we see the @type property. At that point, we can take the value, consult + // the type registry for the relevant message, and replay the stream, omitting the @type property. + var tokens = new List(); + + var token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StartObject) + { + throw new InvalidProtocolBufferException("Expected object value for Any"); + } + int typeUrlObjectDepth = tokenizer.ObjectDepth; + + // The check for the property depth protects us from nested Any values which occur before the type URL + // for *this* Any. + while (token.Type != JsonToken.TokenType.Name || + token.StringValue != JsonFormatter.AnyTypeUrlField || + tokenizer.ObjectDepth != typeUrlObjectDepth) + { + tokens.Add(token); + token = tokenizer.Next(); + } + + // Don't add the @type property or its value to the recorded token list + token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StringValue) + { + throw new InvalidProtocolBufferException("Expected string value for Any.@type"); + } + string typeUrl = token.StringValue; + string typeName = JsonFormatter.GetTypeName(typeUrl); + + MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName); + if (descriptor == null) + { + throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'"); + } + + // Now replay the token stream we've already read and anything that remains of the object, just parsing it + // as normal. Our original tokenizer should end up at the end of the object. + var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer); + var body = descriptor.Parser.CreateTemplate(); + if (descriptor.IsWellKnownType) + { + MergeWellKnownTypeAnyBody(body, replay); + } + else + { + Merge(body, replay); + } + var data = body.ToByteString(); + + // Now that we have the message data, we can pack it into an Any (the message received as a parameter). + message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl); + message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data); + } + + // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property + // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value + // itself, and then end-object. + private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer) + { + var token = tokenizer.Next(); // Definitely start-object; checked in previous method + token = tokenizer.Next(); + // TODO: What about an absent Int32Value, for example? + if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField) + { + throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body"); + } + Merge(body, tokenizer); + token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.EndObject) + { + throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type"); + } + } + #region Utility methods which don't depend on the state (or settings) of the parser. private static object ParseMapKey(FieldDescriptor field, string keyText) { @@ -789,29 +899,48 @@ namespace Google.Protobuf /// public sealed class Settings { - private static readonly Settings defaultInstance = new Settings(CodedInputStream.DefaultRecursionLimit); - - private readonly int recursionLimit; - /// - /// Default settings, as used by + /// Default settings, as used by . This has the same default + /// recursion limit as , and an empty type registry. /// - public static Settings Default { get { return defaultInstance; } } + public static Settings Default { get; } + + // Workaround for the Mono compiler complaining about XML comments not being on + // valid language elements. + static Settings() + { + Default = new Settings(CodedInputStream.DefaultRecursionLimit); + } /// /// The maximum depth of messages to parse. Note that this limit only applies to parsing /// messages, not collections - so a message within a collection within a message only counts as /// depth 2, not 3. /// - public int RecursionLimit { get { return recursionLimit; } } + public int RecursionLimit { get; } + + /// + /// The type registry used to parse messages. + /// + public TypeRegistry TypeRegistry { get; } /// /// Creates a new object with the specified recursion limit. /// /// The maximum depth of messages to parse - public Settings(int recursionLimit) + public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty) + { + } + + /// + /// Creates a new object with the specified recursion limit and type registry. + /// + /// The maximum depth of messages to parse + /// The type registry used to parse messages + public Settings(int recursionLimit, TypeRegistry typeRegistry) { - this.recursionLimit = recursionLimit; + RecursionLimit = recursionLimit; + TypeRegistry = Preconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)); } } } diff --git a/csharp/src/Google.Protobuf/JsonTokenizer.cs b/csharp/src/Google.Protobuf/JsonTokenizer.cs index 6589427a4d..09a6d43b7b 100644 --- a/csharp/src/Google.Protobuf/JsonTokenizer.cs +++ b/csharp/src/Google.Protobuf/JsonTokenizer.cs @@ -47,32 +47,38 @@ namespace Google.Protobuf /// between values. It validates the token stream as it goes - so callers can assume that the /// tokens it produces are appropriate. For example, it would never produce "start object, end array." /// + /// Implementation details: the base class handles single token push-back and /// Not thread-safe. /// - internal sealed class JsonTokenizer + internal abstract class JsonTokenizer { - // The set of states in which a value is valid next token. - private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument; - - private readonly Stack containerStack = new Stack(); - private readonly PushBackReader reader; private JsonToken bufferedToken; - private State state; - private int objectDepth = 0; /// - /// Returns the depth of the stack, purely in objects (not collections). - /// Informally, this is the number of remaining unclosed '{' characters we have. + /// Creates a tokenizer that reads from the given text reader. /// - internal int ObjectDepth { get { return objectDepth; } } + internal static JsonTokenizer FromTextReader(TextReader reader) + { + return new JsonTextTokenizer(reader); + } - internal JsonTokenizer(TextReader reader) + /// + /// Creates a tokenizer that first replays the given list of tokens, then continues reading + /// from another tokenizer. Note that if the returned tokenizer is "pushed back", that does not push back + /// on the continuation tokenizer, or vice versa. Care should be taken when using this method - it was + /// created for the sake of Any parsing. + /// + internal static JsonTokenizer FromReplayedTokens(IList tokens, JsonTokenizer continuation) { - this.reader = new PushBackReader(reader); - state = State.StartOfDocument; - containerStack.Push(ContainerType.Document); + return new JsonReplayTokenizer(tokens, continuation); } + /// + /// Returns the depth of the stack, purely in objects (not collections). + /// Informally, this is the number of remaining unclosed '{' characters we have. + /// + internal int ObjectDepth { get; private set; } + // TODO: Why do we allow a different token to be pushed back? It might be better to always remember the previous // token returned, and allow a parameterless Rewind() method (which could only be called once, just like the current PushBack). internal void PushBack(JsonToken token) @@ -84,11 +90,11 @@ namespace Google.Protobuf bufferedToken = token; if (token.Type == JsonToken.TokenType.StartObject) { - objectDepth--; + ObjectDepth--; } else if (token.Type == JsonToken.TokenType.EndObject) { - objectDepth++; + ObjectDepth++; } } @@ -96,574 +102,636 @@ namespace Google.Protobuf /// Returns the next JSON token in the stream. An EndDocument token is returned to indicate the end of the stream, /// after which point Next() should not be called again. /// - /// - /// This method essentially just loops through characters skipping whitespace, validating and - /// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon) - /// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point - /// it returns the token. Although the method is large, it would be relatively hard to break down further... most - /// of it is the large switch statement, which sometimes returns and sometimes doesn't. - /// + /// This implementation provides single-token buffering, and calls if there is no buffered token. /// The next token in the stream. This is never null. /// This method is called after an EndDocument token has been returned /// The input text does not comply with RFC 7159 internal JsonToken Next() { + JsonToken tokenToReturn; if (bufferedToken != null) { - var ret = bufferedToken; + tokenToReturn = bufferedToken; bufferedToken = null; - if (ret.Type == JsonToken.TokenType.StartObject) - { - objectDepth++; - } - else if (ret.Type == JsonToken.TokenType.EndObject) - { - objectDepth--; - } - return ret; } - if (state == State.ReaderExhausted) + else { - throw new InvalidOperationException("Next() called after end of document"); + tokenToReturn = NextImpl(); } - while (true) + if (tokenToReturn.Type == JsonToken.TokenType.StartObject) { - var next = reader.Read(); - if (next == null) - { - ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: "); - state = State.ReaderExhausted; - return JsonToken.EndDocument; - } - switch (next.Value) - { - // Skip whitespace between tokens - case ' ': - case '\t': - case '\r': - case '\n': - break; - case ':': - ValidateState(State.ObjectBeforeColon, "Invalid state to read a colon: "); - state = State.ObjectAfterColon; - break; - case ',': - ValidateState(State.ObjectAfterProperty | State.ArrayAfterValue, "Invalid state to read a colon: "); - state = state == State.ObjectAfterProperty ? State.ObjectAfterComma : State.ArrayAfterComma; - break; - case '"': - string stringValue = ReadString(); - if ((state & (State.ObjectStart | State.ObjectAfterComma)) != 0) - { - state = State.ObjectBeforeColon; - return JsonToken.Name(stringValue); - } - else - { - ValidateAndModifyStateForValue("Invalid state to read a double quote: "); - return JsonToken.Value(stringValue); - } - case '{': - ValidateState(ValueStates, "Invalid state to read an open brace: "); - state = State.ObjectStart; - containerStack.Push(ContainerType.Object); - objectDepth++; - return JsonToken.StartObject; - case '}': - ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: "); - PopContainer(); - objectDepth--; - return JsonToken.EndObject; - case '[': - ValidateState(ValueStates, "Invalid state to read an open square bracket: "); - state = State.ArrayStart; - containerStack.Push(ContainerType.Array); - return JsonToken.StartArray; - case ']': - ValidateState(State.ArrayAfterValue | State.ArrayStart, "Invalid state to read a close square bracket: "); - PopContainer(); - return JsonToken.EndArray; - case 'n': // Start of null - ConsumeLiteral("null"); - ValidateAndModifyStateForValue("Invalid state to read a null literal: "); - return JsonToken.Null; - case 't': // Start of true - ConsumeLiteral("true"); - ValidateAndModifyStateForValue("Invalid state to read a true literal: "); - return JsonToken.True; - case 'f': // Start of false - ConsumeLiteral("false"); - ValidateAndModifyStateForValue("Invalid state to read a false literal: "); - return JsonToken.False; - case '-': // Start of a number - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - double number = ReadNumber(next.Value); - ValidateAndModifyStateForValue("Invalid state to read a number token: "); - return JsonToken.Value(number); - default: - throw new InvalidJsonException("Invalid first character of token: " + next.Value); - } + ObjectDepth++; } - } - - private void ValidateState(State validStates, string errorPrefix) - { - if ((validStates & state) == 0) + else if (tokenToReturn.Type == JsonToken.TokenType.EndObject) { - throw reader.CreateException(errorPrefix + state); + ObjectDepth--; } + return tokenToReturn; } /// - /// Reads a string token. It is assumed that the opening " has already been read. + /// Returns the next JSON token in the stream, when requested by the base class. (The method delegates + /// to this if it doesn't have a buffered token.) /// - private string ReadString() - { - var value = new StringBuilder(); - bool haveHighSurrogate = false; - while (true) - { - char c = reader.ReadOrFail("Unexpected end of text while reading string"); - if (c < ' ') - { - throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in string literal: U+{0:x4}", (int) c)); - } - if (c == '"') - { - if (haveHighSurrogate) - { - throw reader.CreateException("Invalid use of surrogate pair code units"); - } - return value.ToString(); - } - if (c == '\\') - { - c = ReadEscapedCharacter(); - } - // TODO: Consider only allowing surrogate pairs that are either both escaped, - // or both not escaped. It would be a very odd text stream that contained a "lone" high surrogate - // followed by an escaped low surrogate or vice versa... and that couldn't even be represented in UTF-8. - if (haveHighSurrogate != char.IsLowSurrogate(c)) - { - throw reader.CreateException("Invalid use of surrogate pair code units"); - } - haveHighSurrogate = char.IsHighSurrogate(c); - value.Append(c); - } - } + /// This method is called after an EndDocument token has been returned + /// The input text does not comply with RFC 7159 + protected abstract JsonToken NextImpl(); /// - /// Reads an escaped character. It is assumed that the leading backslash has already been read. + /// Tokenizer which first exhausts a list of tokens, then consults another tokenizer. /// - private char ReadEscapedCharacter() + private class JsonReplayTokenizer : JsonTokenizer { - char c = reader.ReadOrFail("Unexpected end of text while reading character escape sequence"); - switch (c) + private readonly IList tokens; + private readonly JsonTokenizer nextTokenizer; + private int nextTokenIndex; + + internal JsonReplayTokenizer(IList tokens, JsonTokenizer nextTokenizer) { - case 'n': - return '\n'; - case '\\': - return '\\'; - case 'b': - return '\b'; - case 'f': - return '\f'; - case 'r': - return '\r'; - case 't': - return '\t'; - case '"': - return '"'; - case '/': - return '/'; - case 'u': - return ReadUnicodeEscape(); - default: - throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); + this.tokens = tokens; + this.nextTokenizer = nextTokenizer; } - } - /// - /// Reads an escaped Unicode 4-nybble hex sequence. It is assumed that the leading \u has already been read. - /// - private char ReadUnicodeEscape() - { - int result = 0; - for (int i = 0; i < 4; i++) + // FIXME: Object depth not maintained... + protected override JsonToken NextImpl() { - char c = reader.ReadOrFail("Unexpected end of text while reading Unicode escape sequence"); - int nybble; - if (c >= '0' && c <= '9') + if (nextTokenIndex >= tokens.Count) { - nybble = c - '0'; + return nextTokenizer.Next(); } - else if (c >= 'a' && c <= 'f') - { - nybble = c - 'a' + 10; - } - else if (c >= 'A' && c <= 'F') - { - nybble = c - 'A' + 10; - } - else - { - throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); - } - result = (result << 4) + nybble; + return tokens[nextTokenIndex++]; } - return (char) result; } /// - /// Consumes a text-only literal, throwing an exception if the read text doesn't match it. - /// It is assumed that the first letter of the literal has already been read. + /// Tokenizer which does all the *real* work of parsing JSON. /// - private void ConsumeLiteral(string text) + private sealed class JsonTextTokenizer : JsonTokenizer { - for (int i = 1; i < text.Length; i++) + // The set of states in which a value is valid next token. + private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument; + + private readonly Stack containerStack = new Stack(); + private readonly PushBackReader reader; + private State state; + + internal JsonTextTokenizer(TextReader reader) { - char? next = reader.Read(); - if (next == null) + this.reader = new PushBackReader(reader); + state = State.StartOfDocument; + containerStack.Push(ContainerType.Document); + } + + /// + /// This method essentially just loops through characters skipping whitespace, validating and + /// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon) + /// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point + /// it returns the token. Although the method is large, it would be relatively hard to break down further... most + /// of it is the large switch statement, which sometimes returns and sometimes doesn't. + /// + protected override JsonToken NextImpl() + { + if (state == State.ReaderExhausted) { - throw reader.CreateException("Unexpected end of text while reading literal token " + text); + throw new InvalidOperationException("Next() called after end of document"); } - if (next.Value != text[i]) + while (true) { - throw reader.CreateException("Unexpected character while reading literal token " + text); + var next = reader.Read(); + if (next == null) + { + ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: "); + state = State.ReaderExhausted; + return JsonToken.EndDocument; + } + switch (next.Value) + { + // Skip whitespace between tokens + case ' ': + case '\t': + case '\r': + case '\n': + break; + case ':': + ValidateState(State.ObjectBeforeColon, "Invalid state to read a colon: "); + state = State.ObjectAfterColon; + break; + case ',': + ValidateState(State.ObjectAfterProperty | State.ArrayAfterValue, "Invalid state to read a colon: "); + state = state == State.ObjectAfterProperty ? State.ObjectAfterComma : State.ArrayAfterComma; + break; + case '"': + string stringValue = ReadString(); + if ((state & (State.ObjectStart | State.ObjectAfterComma)) != 0) + { + state = State.ObjectBeforeColon; + return JsonToken.Name(stringValue); + } + else + { + ValidateAndModifyStateForValue("Invalid state to read a double quote: "); + return JsonToken.Value(stringValue); + } + case '{': + ValidateState(ValueStates, "Invalid state to read an open brace: "); + state = State.ObjectStart; + containerStack.Push(ContainerType.Object); + return JsonToken.StartObject; + case '}': + ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: "); + PopContainer(); + return JsonToken.EndObject; + case '[': + ValidateState(ValueStates, "Invalid state to read an open square bracket: "); + state = State.ArrayStart; + containerStack.Push(ContainerType.Array); + return JsonToken.StartArray; + case ']': + ValidateState(State.ArrayAfterValue | State.ArrayStart, "Invalid state to read a close square bracket: "); + PopContainer(); + return JsonToken.EndArray; + case 'n': // Start of null + ConsumeLiteral("null"); + ValidateAndModifyStateForValue("Invalid state to read a null literal: "); + return JsonToken.Null; + case 't': // Start of true + ConsumeLiteral("true"); + ValidateAndModifyStateForValue("Invalid state to read a true literal: "); + return JsonToken.True; + case 'f': // Start of false + ConsumeLiteral("false"); + ValidateAndModifyStateForValue("Invalid state to read a false literal: "); + return JsonToken.False; + case '-': // Start of a number + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + double number = ReadNumber(next.Value); + ValidateAndModifyStateForValue("Invalid state to read a number token: "); + return JsonToken.Value(number); + default: + throw new InvalidJsonException("Invalid first character of token: " + next.Value); + } } } - } - private double ReadNumber(char initialCharacter) - { - StringBuilder builder = new StringBuilder(); - if (initialCharacter == '-') - { - builder.Append("-"); - } - else - { - reader.PushBack(initialCharacter); - } - // Each method returns the character it read that doesn't belong in that part, - // so we know what to do next, including pushing the character back at the end. - // null is returned for "end of text". - char? next = ReadInt(builder); - if (next == '.') + private void ValidateState(State validStates, string errorPrefix) { - next = ReadFrac(builder); - } - if (next == 'e' || next == 'E') - { - next = ReadExp(builder); - } - // If we read a character which wasn't part of the number, push it back so we can read it again - // to parse the next token. - if (next != null) - { - reader.PushBack(next.Value); + if ((validStates & state) == 0) + { + throw reader.CreateException(errorPrefix + state); + } } - // TODO: What exception should we throw if the value can't be represented as a double? - try - { - return double.Parse(builder.ToString(), - NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, - CultureInfo.InvariantCulture); - } - catch (OverflowException) + /// + /// Reads a string token. It is assumed that the opening " has already been read. + /// + private string ReadString() { - throw reader.CreateException("Numeric value out of range: " + builder); + var value = new StringBuilder(); + bool haveHighSurrogate = false; + while (true) + { + char c = reader.ReadOrFail("Unexpected end of text while reading string"); + if (c < ' ') + { + throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in string literal: U+{0:x4}", (int) c)); + } + if (c == '"') + { + if (haveHighSurrogate) + { + throw reader.CreateException("Invalid use of surrogate pair code units"); + } + return value.ToString(); + } + if (c == '\\') + { + c = ReadEscapedCharacter(); + } + // TODO: Consider only allowing surrogate pairs that are either both escaped, + // or both not escaped. It would be a very odd text stream that contained a "lone" high surrogate + // followed by an escaped low surrogate or vice versa... and that couldn't even be represented in UTF-8. + if (haveHighSurrogate != char.IsLowSurrogate(c)) + { + throw reader.CreateException("Invalid use of surrogate pair code units"); + } + haveHighSurrogate = char.IsHighSurrogate(c); + value.Append(c); + } } - } - private char? ReadInt(StringBuilder builder) - { - char first = reader.ReadOrFail("Invalid numeric literal"); - if (first < '0' || first > '9') - { - throw reader.CreateException("Invalid numeric literal"); - } - builder.Append(first); - int digitCount; - char? next = ConsumeDigits(builder, out digitCount); - if (first == '0' && digitCount != 0) + /// + /// Reads an escaped character. It is assumed that the leading backslash has already been read. + /// + private char ReadEscapedCharacter() { - throw reader.CreateException("Invalid numeric literal: leading 0 for non-zero value."); + char c = reader.ReadOrFail("Unexpected end of text while reading character escape sequence"); + switch (c) + { + case 'n': + return '\n'; + case '\\': + return '\\'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'r': + return '\r'; + case 't': + return '\t'; + case '"': + return '"'; + case '/': + return '/'; + case 'u': + return ReadUnicodeEscape(); + default: + throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); + } } - return next; - } - private char? ReadFrac(StringBuilder builder) - { - builder.Append('.'); // Already consumed this - int digitCount; - char? next = ConsumeDigits(builder, out digitCount); - if (digitCount == 0) + /// + /// Reads an escaped Unicode 4-nybble hex sequence. It is assumed that the leading \u has already been read. + /// + private char ReadUnicodeEscape() { - throw reader.CreateException("Invalid numeric literal: fraction with no trailing digits"); + int result = 0; + for (int i = 0; i < 4; i++) + { + char c = reader.ReadOrFail("Unexpected end of text while reading Unicode escape sequence"); + int nybble; + if (c >= '0' && c <= '9') + { + nybble = c - '0'; + } + else if (c >= 'a' && c <= 'f') + { + nybble = c - 'a' + 10; + } + else if (c >= 'A' && c <= 'F') + { + nybble = c - 'A' + 10; + } + else + { + throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); + } + result = (result << 4) + nybble; + } + return (char) result; } - return next; - } - private char? ReadExp(StringBuilder builder) - { - builder.Append('E'); // Already consumed this (or 'e') - char? next = reader.Read(); - if (next == null) + /// + /// Consumes a text-only literal, throwing an exception if the read text doesn't match it. + /// It is assumed that the first letter of the literal has already been read. + /// + private void ConsumeLiteral(string text) { - throw reader.CreateException("Invalid numeric literal: exponent with no trailing digits"); + for (int i = 1; i < text.Length; i++) + { + char? next = reader.Read(); + if (next == null) + { + throw reader.CreateException("Unexpected end of text while reading literal token " + text); + } + if (next.Value != text[i]) + { + throw reader.CreateException("Unexpected character while reading literal token " + text); + } + } } - if (next == '-' || next == '+') + + private double ReadNumber(char initialCharacter) { - builder.Append(next.Value); + StringBuilder builder = new StringBuilder(); + if (initialCharacter == '-') + { + builder.Append("-"); + } + else + { + reader.PushBack(initialCharacter); + } + // Each method returns the character it read that doesn't belong in that part, + // so we know what to do next, including pushing the character back at the end. + // null is returned for "end of text". + char? next = ReadInt(builder); + if (next == '.') + { + next = ReadFrac(builder); + } + if (next == 'e' || next == 'E') + { + next = ReadExp(builder); + } + // If we read a character which wasn't part of the number, push it back so we can read it again + // to parse the next token. + if (next != null) + { + reader.PushBack(next.Value); + } + + // TODO: What exception should we throw if the value can't be represented as a double? + try + { + return double.Parse(builder.ToString(), + NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, + CultureInfo.InvariantCulture); + } + catch (OverflowException) + { + throw reader.CreateException("Numeric value out of range: " + builder); + } } - else + + private char? ReadInt(StringBuilder builder) { - reader.PushBack(next.Value); + char first = reader.ReadOrFail("Invalid numeric literal"); + if (first < '0' || first > '9') + { + throw reader.CreateException("Invalid numeric literal"); + } + builder.Append(first); + int digitCount; + char? next = ConsumeDigits(builder, out digitCount); + if (first == '0' && digitCount != 0) + { + throw reader.CreateException("Invalid numeric literal: leading 0 for non-zero value."); + } + return next; } - int digitCount; - next = ConsumeDigits(builder, out digitCount); - if (digitCount == 0) + + private char? ReadFrac(StringBuilder builder) { - throw reader.CreateException("Invalid numeric literal: exponent without value"); + builder.Append('.'); // Already consumed this + int digitCount; + char? next = ConsumeDigits(builder, out digitCount); + if (digitCount == 0) + { + throw reader.CreateException("Invalid numeric literal: fraction with no trailing digits"); + } + return next; } - return next; - } - private char? ConsumeDigits(StringBuilder builder, out int count) - { - count = 0; - while (true) + private char? ReadExp(StringBuilder builder) { + builder.Append('E'); // Already consumed this (or 'e') char? next = reader.Read(); - if (next == null || next.Value < '0' || next.Value > '9') + if (next == null) + { + throw reader.CreateException("Invalid numeric literal: exponent with no trailing digits"); + } + if (next == '-' || next == '+') { - return next; + builder.Append(next.Value); } - count++; - builder.Append(next.Value); + else + { + reader.PushBack(next.Value); + } + int digitCount; + next = ConsumeDigits(builder, out digitCount); + if (digitCount == 0) + { + throw reader.CreateException("Invalid numeric literal: exponent without value"); + } + return next; } - } - /// - /// Validates that we're in a valid state to read a value (using the given error prefix if necessary) - /// and changes the state to the appropriate one, e.g. ObjectAfterColon to ObjectAfterProperty. - /// - private void ValidateAndModifyStateForValue(string errorPrefix) - { - ValidateState(ValueStates, errorPrefix); - switch (state) + private char? ConsumeDigits(StringBuilder builder, out int count) { - case State.StartOfDocument: - state = State.ExpectedEndOfDocument; - return; - case State.ObjectAfterColon: - state = State.ObjectAfterProperty; - return; - case State.ArrayStart: - case State.ArrayAfterComma: - state = State.ArrayAfterValue; - return; - default: - throw new InvalidOperationException("ValidateAndModifyStateForValue does not handle all value states (and should)"); + count = 0; + while (true) + { + char? next = reader.Read(); + if (next == null || next.Value < '0' || next.Value > '9') + { + return next; + } + count++; + builder.Append(next.Value); + } } - } - /// - /// Pops the top-most container, and sets the state to the appropriate one for the end of a value - /// in the parent container. - /// - private void PopContainer() - { - containerStack.Pop(); - var parent = containerStack.Peek(); - switch (parent) + /// + /// Validates that we're in a valid state to read a value (using the given error prefix if necessary) + /// and changes the state to the appropriate one, e.g. ObjectAfterColon to ObjectAfterProperty. + /// + private void ValidateAndModifyStateForValue(string errorPrefix) { - case ContainerType.Object: - state = State.ObjectAfterProperty; - break; - case ContainerType.Array: - state = State.ArrayAfterValue; - break; - case ContainerType.Document: - state = State.ExpectedEndOfDocument; - break; - default: - throw new InvalidOperationException("Unexpected container type: " + parent); + ValidateState(ValueStates, errorPrefix); + switch (state) + { + case State.StartOfDocument: + state = State.ExpectedEndOfDocument; + return; + case State.ObjectAfterColon: + state = State.ObjectAfterProperty; + return; + case State.ArrayStart: + case State.ArrayAfterComma: + state = State.ArrayAfterValue; + return; + default: + throw new InvalidOperationException("ValidateAndModifyStateForValue does not handle all value states (and should)"); + } } - } - private enum ContainerType - { - Document, Object, Array - } - - /// - /// Possible states of the tokenizer. - /// - /// - /// This is a flags enum purely so we can simply and efficiently represent a set of valid states - /// for checking. - /// - /// Each is documented with an example, - /// where ^ represents the current position within the text stream. The examples all use string values, - /// but could be any value, including nested objects/arrays. - /// The complete state of the tokenizer also includes a stack to indicate the contexts (arrays/objects). - /// Any additional notional state of "AfterValue" indicates that a value has been completed, at which - /// point there's an immediate transition to ExpectedEndOfDocument, ObjectAfterProperty or ArrayAfterValue. - /// - /// - /// These states were derived manually by reading RFC 7159 carefully. - /// - /// - [Flags] - private enum State - { - /// - /// ^ { "foo": "bar" } - /// Before the value in a document. Next states: ObjectStart, ArrayStart, "AfterValue" - /// - StartOfDocument = 1 << 0, - /// - /// { "foo": "bar" } ^ - /// After the value in a document. Next states: ReaderExhausted - /// - ExpectedEndOfDocument = 1 << 1, - /// - /// { "foo": "bar" } ^ (and already read to the end of the reader) - /// Terminal state. - /// - ReaderExhausted = 1 << 2, - /// - /// { ^ "foo": "bar" } - /// Before the *first* property in an object. - /// Next states: - /// "AfterValue" (empty object) - /// ObjectBeforeColon (read a name) - /// - ObjectStart = 1 << 3, - /// - /// { "foo" ^ : "bar", "x": "y" } - /// Next state: ObjectAfterColon - /// - ObjectBeforeColon = 1 << 4, - /// - /// { "foo" : ^ "bar", "x": "y" } - /// Before any property other than the first in an object. - /// (Equivalently: after any property in an object) - /// Next states: - /// "AfterValue" (value is simple) - /// ObjectStart (value is object) - /// ArrayStart (value is array) - /// - ObjectAfterColon = 1 << 5, - /// - /// { "foo" : "bar" ^ , "x" : "y" } - /// At the end of a property, so expecting either a comma or end-of-object - /// Next states: ObjectAfterComma or "AfterValue" - /// - ObjectAfterProperty = 1 << 6, - /// - /// { "foo":"bar", ^ "x":"y" } - /// Read the comma after the previous property, so expecting another property. - /// This is like ObjectStart, but closing brace isn't valid here - /// Next state: ObjectBeforeColon. - /// - ObjectAfterComma = 1 << 7, - /// - /// [ ^ "foo", "bar" ] - /// Before the *first* value in an array. - /// Next states: - /// "AfterValue" (read a value) - /// "AfterValue" (end of array; will pop stack) - /// - ArrayStart = 1 << 8, - /// - /// [ "foo" ^ , "bar" ] - /// After any value in an array, so expecting either a comma or end-of-array - /// Next states: ArrayAfterComma or "AfterValue" - /// - ArrayAfterValue = 1 << 9, /// - /// [ "foo", ^ "bar" ] - /// After a comma in an array, so there *must* be another value (simple or complex). - /// Next states: "AfterValue" (simple value), StartObject, StartArray + /// Pops the top-most container, and sets the state to the appropriate one for the end of a value + /// in the parent container. /// - ArrayAfterComma = 1 << 10 - } - - /// - /// Wrapper around a text reader allowing small amounts of buffering and location handling. - /// - private class PushBackReader - { - // TODO: Add locations for errors etc. - - private readonly TextReader reader; + private void PopContainer() + { + containerStack.Pop(); + var parent = containerStack.Peek(); + switch (parent) + { + case ContainerType.Object: + state = State.ObjectAfterProperty; + break; + case ContainerType.Array: + state = State.ArrayAfterValue; + break; + case ContainerType.Document: + state = State.ExpectedEndOfDocument; + break; + default: + throw new InvalidOperationException("Unexpected container type: " + parent); + } + } - internal PushBackReader(TextReader reader) + private enum ContainerType { - // TODO: Wrap the reader in a BufferedReader? - this.reader = reader; + Document, Object, Array } /// - /// The buffered next character, if we have one. + /// Possible states of the tokenizer. /// - private char? nextChar; + /// + /// This is a flags enum purely so we can simply and efficiently represent a set of valid states + /// for checking. + /// + /// Each is documented with an example, + /// where ^ represents the current position within the text stream. The examples all use string values, + /// but could be any value, including nested objects/arrays. + /// The complete state of the tokenizer also includes a stack to indicate the contexts (arrays/objects). + /// Any additional notional state of "AfterValue" indicates that a value has been completed, at which + /// point there's an immediate transition to ExpectedEndOfDocument, ObjectAfterProperty or ArrayAfterValue. + /// + /// + /// These states were derived manually by reading RFC 7159 carefully. + /// + /// + [Flags] + private enum State + { + /// + /// ^ { "foo": "bar" } + /// Before the value in a document. Next states: ObjectStart, ArrayStart, "AfterValue" + /// + StartOfDocument = 1 << 0, + /// + /// { "foo": "bar" } ^ + /// After the value in a document. Next states: ReaderExhausted + /// + ExpectedEndOfDocument = 1 << 1, + /// + /// { "foo": "bar" } ^ (and already read to the end of the reader) + /// Terminal state. + /// + ReaderExhausted = 1 << 2, + /// + /// { ^ "foo": "bar" } + /// Before the *first* property in an object. + /// Next states: + /// "AfterValue" (empty object) + /// ObjectBeforeColon (read a name) + /// + ObjectStart = 1 << 3, + /// + /// { "foo" ^ : "bar", "x": "y" } + /// Next state: ObjectAfterColon + /// + ObjectBeforeColon = 1 << 4, + /// + /// { "foo" : ^ "bar", "x": "y" } + /// Before any property other than the first in an object. + /// (Equivalently: after any property in an object) + /// Next states: + /// "AfterValue" (value is simple) + /// ObjectStart (value is object) + /// ArrayStart (value is array) + /// + ObjectAfterColon = 1 << 5, + /// + /// { "foo" : "bar" ^ , "x" : "y" } + /// At the end of a property, so expecting either a comma or end-of-object + /// Next states: ObjectAfterComma or "AfterValue" + /// + ObjectAfterProperty = 1 << 6, + /// + /// { "foo":"bar", ^ "x":"y" } + /// Read the comma after the previous property, so expecting another property. + /// This is like ObjectStart, but closing brace isn't valid here + /// Next state: ObjectBeforeColon. + /// + ObjectAfterComma = 1 << 7, + /// + /// [ ^ "foo", "bar" ] + /// Before the *first* value in an array. + /// Next states: + /// "AfterValue" (read a value) + /// "AfterValue" (end of array; will pop stack) + /// + ArrayStart = 1 << 8, + /// + /// [ "foo" ^ , "bar" ] + /// After any value in an array, so expecting either a comma or end-of-array + /// Next states: ArrayAfterComma or "AfterValue" + /// + ArrayAfterValue = 1 << 9, + /// + /// [ "foo", ^ "bar" ] + /// After a comma in an array, so there *must* be another value (simple or complex). + /// Next states: "AfterValue" (simple value), StartObject, StartArray + /// + ArrayAfterComma = 1 << 10 + } /// - /// Returns the next character in the stream, or null if we have reached the end. + /// Wrapper around a text reader allowing small amounts of buffering and location handling. /// - /// - internal char? Read() + private class PushBackReader { - if (nextChar != null) + // TODO: Add locations for errors etc. + + private readonly TextReader reader; + + internal PushBackReader(TextReader reader) { - char? tmp = nextChar; - nextChar = null; - return tmp; + // TODO: Wrap the reader in a BufferedReader? + this.reader = reader; } - int next = reader.Read(); - return next == -1 ? null : (char?) next; - } - internal char ReadOrFail(string messageOnFailure) - { - char? next = Read(); - if (next == null) + /// + /// The buffered next character, if we have one. + /// + private char? nextChar; + + /// + /// Returns the next character in the stream, or null if we have reached the end. + /// + /// + internal char? Read() { - throw CreateException(messageOnFailure); + if (nextChar != null) + { + char? tmp = nextChar; + nextChar = null; + return tmp; + } + int next = reader.Read(); + return next == -1 ? null : (char?) next; } - return next.Value; - } - internal void PushBack(char c) - { - if (nextChar != null) + internal char ReadOrFail(string messageOnFailure) { - throw new InvalidOperationException("Cannot push back when already buffering a character"); + char? next = Read(); + if (next == null) + { + throw CreateException(messageOnFailure); + } + return next.Value; } - nextChar = c; - } - /// - /// Creates a new exception appropriate for the current state of the reader. - /// - internal InvalidJsonException CreateException(string message) - { - // TODO: Keep track of and use the location. - return new InvalidJsonException(message); + internal void PushBack(char c) + { + if (nextChar != null) + { + throw new InvalidOperationException("Cannot push back when already buffering a character"); + } + nextChar = c; + } + + /// + /// Creates a new exception appropriate for the current state of the reader. + /// + internal InvalidJsonException CreateException(string message) + { + // TODO: Keep track of and use the location. + return new InvalidJsonException(message); + } } } }