|
|
|
@ -0,0 +1,341 @@ |
|
|
|
|
using System; |
|
|
|
|
using System.Globalization; |
|
|
|
|
using System.Text.RegularExpressions; |
|
|
|
|
|
|
|
|
|
namespace Google.ProtocolBuffers { |
|
|
|
|
/// <summary> |
|
|
|
|
/// Represents a stream of tokens parsed from a string. |
|
|
|
|
/// </summary> |
|
|
|
|
internal sealed class TextTokenizer { |
|
|
|
|
private readonly string text; |
|
|
|
|
private string currentToken; |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// The character index within the text to perform the next regex match at. |
|
|
|
|
/// </summary> |
|
|
|
|
private int matchPos = 0; |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// The character index within the text at which the current token begins. |
|
|
|
|
/// </summary> |
|
|
|
|
private int pos = 0; |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// The line number of the current token. |
|
|
|
|
/// </summary> |
|
|
|
|
private int line = 0; |
|
|
|
|
/// <summary> |
|
|
|
|
/// The column number of the current token. |
|
|
|
|
/// </summary> |
|
|
|
|
private int column = 0; |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// The line number of the previous token. |
|
|
|
|
/// </summary> |
|
|
|
|
private int previousLine = 0; |
|
|
|
|
/// <summary> |
|
|
|
|
/// The column number of the previous token. |
|
|
|
|
/// </summary> |
|
|
|
|
private int previousColumn = 0; |
|
|
|
|
|
|
|
|
|
private static Regex WhitespaceAndCommentPattern = new Regex("\\G(\\s|(#[^\\\n]*\\n))+", RegexOptions.Compiled); |
|
|
|
|
private static Regex TokenPattern = new Regex( |
|
|
|
|
"\\G[a-zA-Z_][0-9a-zA-Z_+-]*|" + // an identifier |
|
|
|
|
"\\G[0-9+-][0-9a-zA-Z_.+-]*|" + // a number |
|
|
|
|
"\\G\"([^\"\\\n\\\\]|\\\\[^\\\n])*(\"|\\\\?$)|" + // a double-quoted string |
|
|
|
|
"\\G\'([^\"\\\n\\\\]|\\\\[^\\\n])*(\'|\\\\?$)", // a single-quoted string |
|
|
|
|
RegexOptions.Compiled); |
|
|
|
|
|
|
|
|
|
/** Construct a tokenizer that parses tokens from the given text. */ |
|
|
|
|
public TextTokenizer(string text) { |
|
|
|
|
this.text = text; |
|
|
|
|
SkipWhitespace(); |
|
|
|
|
NextToken(); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// Are we at the end of the input? |
|
|
|
|
/// </summary> |
|
|
|
|
public bool AtEnd { |
|
|
|
|
get { return currentToken.Length == 0; } |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// Advances to the next token. |
|
|
|
|
/// </summary> |
|
|
|
|
public void NextToken() { |
|
|
|
|
previousLine = line; |
|
|
|
|
previousColumn = column; |
|
|
|
|
|
|
|
|
|
// Advance the line counter to the current position. |
|
|
|
|
while (pos < matchPos) { |
|
|
|
|
if (text[pos] == '\n') { |
|
|
|
|
++line; |
|
|
|
|
column = 0; |
|
|
|
|
} else { |
|
|
|
|
++column; |
|
|
|
|
} |
|
|
|
|
++pos; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Match the next token. |
|
|
|
|
if (matchPos == text.Length) { |
|
|
|
|
// EOF |
|
|
|
|
currentToken = ""; |
|
|
|
|
} else { |
|
|
|
|
Match match = TokenPattern.Match(text, matchPos); |
|
|
|
|
if (match.Success) { |
|
|
|
|
currentToken = match.Value; |
|
|
|
|
matchPos += match.Length; |
|
|
|
|
} else { |
|
|
|
|
// Take one character. |
|
|
|
|
currentToken = text[matchPos].ToString(); |
|
|
|
|
matchPos++; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
SkipWhitespace(); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// Skip over any whitespace so that matchPos starts at the next token. |
|
|
|
|
/// </summary> |
|
|
|
|
private void SkipWhitespace() { |
|
|
|
|
Match match = WhitespaceAndCommentPattern.Match(text, matchPos); |
|
|
|
|
if (match.Success) { |
|
|
|
|
matchPos += match.Length; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// If the next token exactly matches the given token, consume it and return |
|
|
|
|
/// true. Otherwise, return false without doing anything. |
|
|
|
|
/// </summary> |
|
|
|
|
public bool TryConsume(string token) { |
|
|
|
|
if (currentToken == token) { |
|
|
|
|
NextToken(); |
|
|
|
|
return true; |
|
|
|
|
} |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* |
|
|
|
|
* If the next token exactly matches {@code token}, consume it. Otherwise, |
|
|
|
|
* throw a {@link ParseException}. |
|
|
|
|
*/ |
|
|
|
|
/// <summary> |
|
|
|
|
/// If the next token exactly matches the specified one, consume it. |
|
|
|
|
/// Otherwise, throw a FormatException. |
|
|
|
|
/// </summary> |
|
|
|
|
/// <param name="token"></param> |
|
|
|
|
public void Consume(string token) { |
|
|
|
|
if (!TryConsume(token)) { |
|
|
|
|
throw CreateFormatException("Expected \"" + token + "\"."); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// Returns true if the next token is an integer, but does not consume it. |
|
|
|
|
/// </summary> |
|
|
|
|
public bool LookingAtInteger() { |
|
|
|
|
if (currentToken.Length == 0) { |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
char c = currentToken[0]; |
|
|
|
|
return ('0' <= c && c <= '9') || c == '-' || c == '+'; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// If the next token is an identifier, consume it and return its value. |
|
|
|
|
/// Otherwise, throw a FormatException. |
|
|
|
|
/// </summary> |
|
|
|
|
public string ConsumeIdentifier() { |
|
|
|
|
foreach (char c in currentToken) { |
|
|
|
|
if (('a' <= c && c <= 'z') || |
|
|
|
|
('A' <= c && c <= 'Z') || |
|
|
|
|
('0' <= c && c <= '9') || |
|
|
|
|
(c == '_') || (c == '.')) { |
|
|
|
|
// OK |
|
|
|
|
} else { |
|
|
|
|
throw CreateFormatException("Expected identifier."); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
string result = currentToken; |
|
|
|
|
NextToken(); |
|
|
|
|
return result; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// If the next token is a 32-bit signed integer, consume it and return its |
|
|
|
|
/// value. Otherwise, throw a FormatException. |
|
|
|
|
/// </summary> |
|
|
|
|
public int ConsumeInt32() { |
|
|
|
|
try { |
|
|
|
|
int result = TextFormat.ParseInt32(currentToken); |
|
|
|
|
NextToken(); |
|
|
|
|
return result; |
|
|
|
|
} catch (FormatException e) { |
|
|
|
|
throw CreateIntegerParseException(e); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// If the next token is a 32-bit unsigned integer, consume it and return its |
|
|
|
|
/// value. Otherwise, throw a FormatException. |
|
|
|
|
/// </summary> |
|
|
|
|
public uint ConsumeUInt32() { |
|
|
|
|
try { |
|
|
|
|
uint result = TextFormat.ParseUInt32(currentToken); |
|
|
|
|
NextToken(); |
|
|
|
|
return result; |
|
|
|
|
} catch (FormatException e) { |
|
|
|
|
throw CreateIntegerParseException(e); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// If the next token is a 64-bit signed integer, consume it and return its |
|
|
|
|
/// value. Otherwise, throw a FormatException. |
|
|
|
|
/// </summary> |
|
|
|
|
public long ConsumeInt64() { |
|
|
|
|
try { |
|
|
|
|
long result = TextFormat.ParseInt64(currentToken); |
|
|
|
|
NextToken(); |
|
|
|
|
return result; |
|
|
|
|
} catch (FormatException e) { |
|
|
|
|
throw CreateIntegerParseException(e); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// If the next token is a 64-bit unsigned integer, consume it and return its |
|
|
|
|
/// value. Otherwise, throw a FormatException. |
|
|
|
|
/// </summary> |
|
|
|
|
public ulong ConsumeUInt64() { |
|
|
|
|
try { |
|
|
|
|
ulong result = TextFormat.ParseUInt64(currentToken); |
|
|
|
|
NextToken(); |
|
|
|
|
return result; |
|
|
|
|
} catch (FormatException e) { |
|
|
|
|
throw CreateIntegerParseException(e); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// If the next token is a double, consume it and return its value. |
|
|
|
|
/// Otherwise, throw a FormatException. |
|
|
|
|
/// </summary> |
|
|
|
|
public double ConsumeDouble() { |
|
|
|
|
try { |
|
|
|
|
double result = double.Parse(currentToken, CultureInfo.InvariantCulture); |
|
|
|
|
NextToken(); |
|
|
|
|
return result; |
|
|
|
|
} catch (FormatException e) { |
|
|
|
|
throw CreateFloatParseException(e); |
|
|
|
|
} catch (OverflowException e) { |
|
|
|
|
throw CreateFloatParseException(e); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// If the next token is a float, consume it and return its value. |
|
|
|
|
/// Otherwise, throw a FormatException. |
|
|
|
|
/// </summary> |
|
|
|
|
public float consumeFloat() { |
|
|
|
|
try { |
|
|
|
|
float result = float.Parse(currentToken, CultureInfo.InvariantCulture); |
|
|
|
|
NextToken(); |
|
|
|
|
return result; |
|
|
|
|
} catch (FormatException e) { |
|
|
|
|
throw CreateFloatParseException(e); |
|
|
|
|
} catch (OverflowException e) { |
|
|
|
|
throw CreateFloatParseException(e); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// If the next token is a Boolean, consume it and return its value. |
|
|
|
|
/// Otherwise, throw a FormatException. |
|
|
|
|
/// </summary> |
|
|
|
|
public bool ConsumeBoolean() { |
|
|
|
|
if (currentToken == "true") { |
|
|
|
|
NextToken(); |
|
|
|
|
return true; |
|
|
|
|
} |
|
|
|
|
if (currentToken == "false") { |
|
|
|
|
NextToken(); |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
throw CreateFormatException("Expected \"true\" or \"false\"."); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// If the next token is a string, consume it and return its (unescaped) value. |
|
|
|
|
/// Otherwise, throw a FormatException. |
|
|
|
|
/// </summary> |
|
|
|
|
public string ConsumeString() { |
|
|
|
|
return ConsumeByteString().ToStringUtf8(); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// If the next token is a string, consume it, unescape it as a |
|
|
|
|
/// ByteString and return it. Otherwise, throw a FormatException. |
|
|
|
|
/// </summary> |
|
|
|
|
public ByteString ConsumeByteString() { |
|
|
|
|
char quote = currentToken.Length > 0 ? currentToken[0] : '\0'; |
|
|
|
|
if (quote != '\"' && quote != '\'') { |
|
|
|
|
throw CreateFormatException("Expected string."); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (currentToken.Length < 2 || |
|
|
|
|
currentToken[currentToken.Length-1] != quote) { |
|
|
|
|
throw CreateFormatException("String missing ending quote."); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
try { |
|
|
|
|
string escaped = currentToken.Substring(1, currentToken.Length - 2); |
|
|
|
|
ByteString result = TextFormat.UnescapeBytes(escaped); |
|
|
|
|
NextToken(); |
|
|
|
|
return result; |
|
|
|
|
} catch (FormatException e) { |
|
|
|
|
throw CreateFormatException(e.Message); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// Returns a format exception with the current line and column numbers |
|
|
|
|
/// in the description, suitable for throwing. |
|
|
|
|
/// </summary> |
|
|
|
|
public FormatException CreateFormatException(string description) { |
|
|
|
|
// Note: People generally prefer one-based line and column numbers. |
|
|
|
|
return new FormatException((line + 1) + ":" + (column + 1) + ": " + description); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// Returns a format exception with the line and column numbers of the |
|
|
|
|
/// previous token in the description, suitable for throwing. |
|
|
|
|
/// </summary> |
|
|
|
|
public FormatException CreateFormatExceptionPreviousToken(string description) { |
|
|
|
|
// Note: People generally prefer one-based line and column numbers. |
|
|
|
|
return new FormatException((previousLine + 1) + ":" + (previousColumn + 1) + ": " + description); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// Constructs an appropriate FormatException for the given existing exception |
|
|
|
|
/// when trying to parse an integer. |
|
|
|
|
/// </summary> |
|
|
|
|
private FormatException CreateIntegerParseException(FormatException e) { |
|
|
|
|
return CreateFormatException("Couldn't parse integer: " + e.Message); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// <summary> |
|
|
|
|
/// Constructs an appropriate FormatException for the given existing exception |
|
|
|
|
/// when trying to parse a float or double. |
|
|
|
|
/// </summary> |
|
|
|
|
private FormatException CreateFloatParseException(Exception e) { |
|
|
|
|
return CreateFormatException("Couldn't parse number: " + e.Message); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |