C#: Optimize JSON parsing in JsonTokenizer

PiperOrigin-RevId: 618097513
pull/16238/head
Protobuf Team Bot 1 year ago committed by Copybara-Service
parent b361c9ca9e
commit 68e6e3e8ee
  1. 117
      csharp/src/Google.Protobuf/JsonTokenizer.cs

@ -1,4 +1,4 @@
#region Copyright notice and license #region Copyright notice and license
// Protocol Buffers - Google's data interchange format // Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved. // Copyright 2008 Google Inc. All rights reserved.
// //
@ -204,14 +204,13 @@ namespace Google.Protobuf
while (true) while (true)
{ {
var next = reader.Read(); var next = reader.Read();
if (next == null) switch (next)
{
ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: ");
state = State.ReaderExhausted;
return JsonToken.EndDocument;
}
switch (next.Value)
{ {
case -1:
ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: ");
state = State.ReaderExhausted;
return JsonToken.EndDocument;
// Skip whitespace between tokens // Skip whitespace between tokens
case ' ': case ' ':
case '\t': case '\t':
@ -279,11 +278,11 @@ namespace Google.Protobuf
case '7': case '7':
case '8': case '8':
case '9': case '9':
double number = ReadNumber(next.Value); double number = ReadNumber((char) next);
ValidateAndModifyStateForValue("Invalid state to read a number token: "); ValidateAndModifyStateForValue("Invalid state to read a number token: ");
return JsonToken.Value(number); return JsonToken.Value(number);
default: default:
throw new InvalidJsonException("Invalid first character of token: " + next.Value); throw new InvalidJsonException($"Invalid first character of token: {(char) next}");
} }
} }
} }
@ -395,14 +394,15 @@ namespace Google.Protobuf
{ {
for (int i = 1; i < text.Length; i++) for (int i = 1; i < text.Length; i++)
{ {
char? next = reader.Read(); int next = reader.Read();
if (next == null) if (next != text[i])
{
throw reader.CreateException("Unexpected end of text while reading literal token " + text);
}
if (next.Value != text[i])
{ {
throw reader.CreateException("Unexpected character while reading literal token " + text); // Only check for "end of text" when we've detected that the character differs from the
// expected one.
var message = next == -1
? $"Unexpected end of text while reading literal token {text}"
: $"Unexpected character while reading literal token {text}";
throw reader.CreateException(message);
} }
} }
} }
@ -422,7 +422,7 @@ namespace Google.Protobuf
// Each method returns the character it read that doesn't belong in that part, // Each method returns the character it read that doesn't belong in that part,
// so we know what to do next, including pushing the character back at the end. // so we know what to do next, including pushing the character back at the end.
// null is returned for "end of text". // null is returned for "end of text".
char? next = ReadInt(builder); int next = ReadInt(builder);
if (next == '.') if (next == '.')
{ {
next = ReadFrac(builder); next = ReadFrac(builder);
@ -433,9 +433,9 @@ namespace Google.Protobuf
} }
// If we read a character which wasn't part of the number, push it back so we can read it again // If we read a character which wasn't part of the number, push it back so we can read it again
// to parse the next token. // to parse the next token.
if (next != null) if (next != -1)
{ {
reader.PushBack(next.Value); reader.PushBack((char) next);
} }
// TODO: What exception should we throw if the value can't be represented as a double? // TODO: What exception should we throw if the value can't be represented as a double?
@ -461,7 +461,12 @@ namespace Google.Protobuf
} }
} }
private char? ReadInt(StringBuilder builder) /// <summary>
/// Copies an integer into a StringBuilder.
/// </summary>
/// <param name="builder">The builder to read the number into</param>
/// <returns>The character following the integer, or -1 for end-of-text.</returns>
private int ReadInt(StringBuilder builder)
{ {
char first = reader.ReadOrFail("Invalid numeric literal"); char first = reader.ReadOrFail("Invalid numeric literal");
if (first < '0' || first > '9') if (first < '0' || first > '9')
@ -469,7 +474,7 @@ namespace Google.Protobuf
throw reader.CreateException("Invalid numeric literal"); throw reader.CreateException("Invalid numeric literal");
} }
builder.Append(first); builder.Append(first);
char? next = ConsumeDigits(builder, out int digitCount); int next = ConsumeDigits(builder, out int digitCount);
if (first == '0' && digitCount != 0) if (first == '0' && digitCount != 0)
{ {
throw reader.CreateException("Invalid numeric literal: leading 0 for non-zero value."); throw reader.CreateException("Invalid numeric literal: leading 0 for non-zero value.");
@ -477,10 +482,15 @@ namespace Google.Protobuf
return next; return next;
} }
private char? ReadFrac(StringBuilder builder) /// <summary>
/// Copies the fractional part of an integer into a StringBuilder, assuming reader is positioned after a period.
/// </summary>
/// <param name="builder">The builder to read the number into</param>
/// <returns>The character following the fractional part, or -1 for end-of-text.</returns>
private int ReadFrac(StringBuilder builder)
{ {
builder.Append('.'); // Already consumed this builder.Append('.'); // Already consumed this
char? next = ConsumeDigits(builder, out int digitCount); int next = ConsumeDigits(builder, out int digitCount);
if (digitCount == 0) if (digitCount == 0)
{ {
throw reader.CreateException("Invalid numeric literal: fraction with no trailing digits"); throw reader.CreateException("Invalid numeric literal: fraction with no trailing digits");
@ -488,21 +498,26 @@ namespace Google.Protobuf
return next; return next;
} }
private char? ReadExp(StringBuilder builder) /// <summary>
/// Copies the exponent part of a number into a StringBuilder, with an assumption that the reader is already positioned after the "e".
/// </summary>
/// <param name="builder">The builder to read the number into</param>
/// <returns>The character following the exponent, or -1 for end-of-text.</returns>
private int ReadExp(StringBuilder builder)
{ {
builder.Append('E'); // Already consumed this (or 'e') builder.Append('E'); // Already consumed this (or 'e')
char? next = reader.Read(); int next = reader.Read();
if (next == null) if (next == -1)
{ {
throw reader.CreateException("Invalid numeric literal: exponent with no trailing digits"); throw reader.CreateException("Invalid numeric literal: exponent with no trailing digits");
} }
if (next == '-' || next == '+') if (next == '-' || next == '+')
{ {
builder.Append(next.Value); builder.Append((char) next);
} }
else else
{ {
reader.PushBack(next.Value); reader.PushBack((char) next);
} }
next = ConsumeDigits(builder, out int digitCount); next = ConsumeDigits(builder, out int digitCount);
if (digitCount == 0) if (digitCount == 0)
@ -512,18 +527,24 @@ namespace Google.Protobuf
return next; return next;
} }
private char? ConsumeDigits(StringBuilder builder, out int count) /// <summary>
/// Copies a sequence of digits into a StringBuilder.
/// </summary>
/// <param name="builder">The builder to read the number into</param>
/// <param name="count">The number of digits appended to the builder</param>
/// <returns>The character following the digits, or -1 for end-of-text.</returns>
private int ConsumeDigits(StringBuilder builder, out int count)
{ {
count = 0; count = 0;
while (true) while (true)
{ {
char? next = reader.Read(); int next = reader.Read();
if (next == null || next.Value < '0' || next.Value > '9') if (next == -1 || next < '0' || next > '9')
{ {
return next; return next;
} }
count++; count++;
builder.Append(next.Value); builder.Append((char) next);
} }
} }
@ -683,39 +704,41 @@ namespace Google.Protobuf
} }
/// <summary> /// <summary>
/// The buffered next character, if we have one. /// The buffered next character, if we have one, or -1 if there is no buffered character.
/// </summary> /// </summary>
private char? nextChar; private int nextChar = -1;
/// <summary> /// <summary>
/// Returns the next character in the stream, or null if we have reached the end. /// Returns the next character in the stream, or -1 if we have reached the end of the stream.
/// </summary> /// </summary>
/// <returns></returns> internal int Read()
internal char? Read()
{ {
if (nextChar != null) if (nextChar != -1)
{ {
char? tmp = nextChar; int tmp = nextChar;
nextChar = null; nextChar = -1;
return tmp; return tmp;
} }
int next = reader.Read(); return reader.Read();
return next == -1 ? null : (char?) next;
} }
/// <summary>
/// Reads the next character from the underlying reader, throwing an <see cref="InvalidJsonException" />
/// with the specified message if there are no more characters available.
/// </summary>
internal char ReadOrFail(string messageOnFailure) internal char ReadOrFail(string messageOnFailure)
{ {
char? next = Read(); int next = Read();
if (next == null) if (next == -1)
{ {
throw CreateException(messageOnFailure); throw CreateException(messageOnFailure);
} }
return next.Value; return (char) next;
} }
internal void PushBack(char c) internal void PushBack(char c)
{ {
if (nextChar != null) if (nextChar != -1)
{ {
throw new InvalidOperationException("Cannot push back when already buffering a character"); throw new InvalidOperationException("Cannot push back when already buffering a character");
} }

Loading…
Cancel
Save