simplify the tokenizer

- remove previous token from the public api
- remove upb_Token type

PiperOrigin-RevId: 469308543
pull/13171/head
Eric Salo 2 years ago committed by Copybara-Service
parent e9f4211f3b
commit 33114209dc
  1. 149
      upb/io/tokenizer.c
  2. 17
      upb/io/tokenizer.h
  3. 125
      upb/io/tokenizer_test.cc

@ -150,35 +150,18 @@ static char TranslateEscape(char c) {
// =================================================================== // ===================================================================
// Structure representing a token read from the token stream. struct upb_Tokenizer {
typedef struct { upb_TokenType token_type; // The type of the current token.
upb_TokenType type;
// "line" and "column" specify the position of the first character of
// the token within the input stream. They are zero-based.
int line;
int column;
int end_column;
// The exact text of the token as it appeared in the input. // The exact text of the current token as it appeared in the input.
// e.g. tokens of TYPE_STRING will still be escaped and in quotes. // e.g. tokens of TYPE_STRING will still be escaped and in quotes.
upb_String text; upb_String token_text;
} upb_Token;
static upb_Token* upb_Token_Init(upb_Token* t, upb_Arena* arena) { // "line" and "column" specify the position of the first character of
upb_String_Init(&t->text, arena); // the token within the input stream. They are zero-based.
int token_line;
t->type = kUpb_TokenType_Start; int token_column;
t->line = 0; int token_end_column;
t->column = 0;
t->end_column = 0;
return t;
}
// ===================================================================
struct upb_Tokenizer {
upb_Token current;
upb_ZeroCopyInputStream* input; upb_ZeroCopyInputStream* input;
upb_ErrorCollector* error_collector; upb_ErrorCollector* error_collector;
@ -296,11 +279,11 @@ static void StopRecording(upb_Tokenizer* t) {
// Called when the current character is the first character of a new // Called when the current character is the first character of a new
// token (not including whitespace or comments). // token (not including whitespace or comments).
static void StartToken(upb_Tokenizer* t) { static void StartToken(upb_Tokenizer* t) {
t->current.type = kUpb_TokenType_Start; t->token_type = kUpb_TokenType_Start;
upb_String_Clear(&t->current.text); upb_String_Clear(&t->token_text);
t->current.line = t->line; t->token_line = t->line;
t->current.column = t->column; t->token_column = t->column;
RecordTo(t, &t->current.text); RecordTo(t, &t->token_text);
} }
// Called when the current character is the first character after the // Called when the current character is the first character after the
@ -308,7 +291,7 @@ static void StartToken(upb_Tokenizer* t) {
// contain all text consumed since StartToken() was called. // contain all text consumed since StartToken() was called.
static void EndToken(upb_Tokenizer* t) { static void EndToken(upb_Tokenizer* t) {
StopRecording(t); StopRecording(t);
t->current.end_column = t->column; t->token_end_column = t->column;
} }
// ----------------------------------------------------------------- // -----------------------------------------------------------------
@ -571,11 +554,11 @@ static upb_CommentType TryConsumeCommentStart(upb_Tokenizer* t) {
return kUpb_CommentType_Block; return kUpb_CommentType_Block;
} else { } else {
// Oops, it was just a slash. Return it. // Oops, it was just a slash. Return it.
t->current.type = kUpb_TokenType_Symbol; t->token_type = kUpb_TokenType_Symbol;
upb_String_Assign(&t->current.text, "/", 1); upb_String_Assign(&t->token_text, "/", 1);
t->current.line = t->line; t->token_line = t->line;
t->current.column = t->column - 1; t->token_column = t->column - 1;
t->current.end_column = t->column; t->token_end_column = t->column;
return kUpb_CommentType_SlashNot; return kUpb_CommentType_SlashNot;
} }
} else if (style_sh && TryConsume(t, '#')) { } else if (style_sh && TryConsume(t, '#')) {
@ -591,14 +574,14 @@ static bool TryConsumeWhitespace(upb_Tokenizer* t) {
if (t->options & kUpb_TokenizerOption_ReportNewlines) { if (t->options & kUpb_TokenizerOption_ReportNewlines) {
if (TryConsumeOne(t, upb_Tokenizer_IsWhitespaceNoNewline)) { if (TryConsumeOne(t, upb_Tokenizer_IsWhitespaceNoNewline)) {
ConsumeZeroOrMore(t, upb_Tokenizer_IsWhitespaceNoNewline); ConsumeZeroOrMore(t, upb_Tokenizer_IsWhitespaceNoNewline);
t->current.type = kUpb_TokenType_Whitespace; t->token_type = kUpb_TokenType_Whitespace;
return true; return true;
} }
return false; return false;
} }
if (TryConsumeOne(t, upb_Tokenizer_IsWhitespace)) { if (TryConsumeOne(t, upb_Tokenizer_IsWhitespace)) {
ConsumeZeroOrMore(t, upb_Tokenizer_IsWhitespace); ConsumeZeroOrMore(t, upb_Tokenizer_IsWhitespace);
t->current.type = kUpb_TokenType_Whitespace; t->token_type = kUpb_TokenType_Whitespace;
return (t->options & kUpb_TokenizerOption_ReportWhitespace) != 0; return (t->options & kUpb_TokenizerOption_ReportWhitespace) != 0;
} }
return false; return false;
@ -609,7 +592,7 @@ static bool TryConsumeWhitespace(upb_Tokenizer* t) {
static bool TryConsumeNewline(upb_Tokenizer* t) { static bool TryConsumeNewline(upb_Tokenizer* t) {
if (t->options & kUpb_TokenizerOption_ReportNewlines) { if (t->options & kUpb_TokenizerOption_ReportNewlines) {
if (TryConsume(t, '\n')) { if (TryConsume(t, '\n')) {
t->current.type = kUpb_TokenType_Newline; t->token_type = kUpb_TokenType_Newline;
return true; return true;
} }
} }
@ -618,51 +601,31 @@ static bool TryConsumeNewline(upb_Tokenizer* t) {
// ------------------------------------------------------------------- // -------------------------------------------------------------------
int upb_Tokenizer_CurrentColumn(const upb_Tokenizer* t) { int upb_Tokenizer_Column(const upb_Tokenizer* t) { return t->token_column; }
return t->current.column;
}
int upb_Tokenizer_CurrentEndColumn(const upb_Tokenizer* t) {
return t->current.end_column;
}
int upb_Tokenizer_CurrentLine(const upb_Tokenizer* t) {
return t->current.line;
}
int upb_Tokenizer_CurrentTextSize(const upb_Tokenizer* t) {
return t->current.text.size_;
}
const char* upb_Tokenizer_CurrentTextData(const upb_Tokenizer* t) { int upb_Tokenizer_EndColumn(const upb_Tokenizer* t) {
return t->current.text.data_; return t->token_end_column;
} }
upb_TokenType upb_Tokenizer_CurrentType(const upb_Tokenizer* t) { int upb_Tokenizer_Line(const upb_Tokenizer* t) { return t->token_line; }
return t->current.type;
}
int upb_Tokenizer_PreviousColumn(const upb_Tokenizer* t) { int upb_Tokenizer_TextSize(const upb_Tokenizer* t) {
return t->previous_column; return t->token_text.size_;
} }
int upb_Tokenizer_PreviousEndColumn(const upb_Tokenizer* t) { const char* upb_Tokenizer_TextData(const upb_Tokenizer* t) {
return t->previous_end_column; return t->token_text.data_;
} }
int upb_Tokenizer_PreviousLine(const upb_Tokenizer* t) { upb_TokenType upb_Tokenizer_Type(const upb_Tokenizer* t) {
return t->previous_line; return t->token_type;
}
upb_TokenType upb_Tokenizer_PreviousType(const upb_Tokenizer* t) {
return t->previous_type;
} }
bool upb_Tokenizer_Next(upb_Tokenizer* t) { bool upb_Tokenizer_Next(upb_Tokenizer* t) {
t->previous_type = t->current.type; t->previous_type = t->token_type;
t->previous_line = t->current.line; t->previous_line = t->token_line;
t->previous_column = t->current.column; t->previous_column = t->token_column;
t->previous_end_column = t->current.end_column; t->previous_end_column = t->token_end_column;
while (!t->read_error) { while (!t->read_error) {
StartToken(t); StartToken(t);
@ -707,9 +670,9 @@ bool upb_Tokenizer_Next(upb_Tokenizer* t) {
if (TryConsumeOne(t, upb_Tokenizer_IsLetter)) { if (TryConsumeOne(t, upb_Tokenizer_IsLetter)) {
ConsumeZeroOrMore(t, upb_Tokenizer_IsAlphanumeric); ConsumeZeroOrMore(t, upb_Tokenizer_IsAlphanumeric);
t->current.type = kUpb_TokenType_Identifier; t->token_type = kUpb_TokenType_Identifier;
} else if (TryConsume(t, '0')) { } else if (TryConsume(t, '0')) {
t->current.type = ConsumeNumber(t, true, false); t->token_type = ConsumeNumber(t, true, false);
} else if (TryConsume(t, '.')) { } else if (TryConsume(t, '.')) {
// This could be the beginning of a floating-point number, or it could // This could be the beginning of a floating-point number, or it could
// just be a '.' symbol. // just be a '.' symbol.
@ -717,26 +680,26 @@ bool upb_Tokenizer_Next(upb_Tokenizer* t) {
if (TryConsumeOne(t, upb_Tokenizer_IsDigit)) { if (TryConsumeOne(t, upb_Tokenizer_IsDigit)) {
// It's a floating-point number. // It's a floating-point number.
if (t->previous_type == kUpb_TokenType_Identifier && if (t->previous_type == kUpb_TokenType_Identifier &&
t->current.line == t->previous_line && t->token_line == t->previous_line &&
t->current.column == t->previous_end_column) { t->token_column == t->previous_end_column) {
// We don't accept syntax like "blah.123". // We don't accept syntax like "blah.123".
t->error_collector->AddError( t->error_collector->AddError(
t->line, t->column - 2, t->line, t->column - 2,
"Need space between identifier and decimal point.", "Need space between identifier and decimal point.",
t->error_collector->context); t->error_collector->context);
} }
t->current.type = ConsumeNumber(t, false, true); t->token_type = ConsumeNumber(t, false, true);
} else { } else {
t->current.type = kUpb_TokenType_Symbol; t->token_type = kUpb_TokenType_Symbol;
} }
} else if (TryConsumeOne(t, upb_Tokenizer_IsDigit)) { } else if (TryConsumeOne(t, upb_Tokenizer_IsDigit)) {
t->current.type = ConsumeNumber(t, false, false); t->token_type = ConsumeNumber(t, false, false);
} else if (TryConsume(t, '\"')) { } else if (TryConsume(t, '\"')) {
ConsumeString(t, '\"'); ConsumeString(t, '\"');
t->current.type = kUpb_TokenType_String; t->token_type = kUpb_TokenType_String;
} else if (TryConsume(t, '\'')) { } else if (TryConsume(t, '\'')) {
ConsumeString(t, '\''); ConsumeString(t, '\'');
t->current.type = kUpb_TokenType_String; t->token_type = kUpb_TokenType_String;
} else { } else {
// Check if the high order bit is set. // Check if the high order bit is set.
if (t->current_char & 0x80) { if (t->current_char & 0x80) {
@ -747,7 +710,7 @@ bool upb_Tokenizer_Next(upb_Tokenizer* t) {
t->error_collector->context); t->error_collector->context);
} }
NextChar(t); NextChar(t);
t->current.type = kUpb_TokenType_Symbol; t->token_type = kUpb_TokenType_Symbol;
} }
EndToken(t); EndToken(t);
@ -756,11 +719,11 @@ bool upb_Tokenizer_Next(upb_Tokenizer* t) {
} }
// EOF // EOF
t->current.type = kUpb_TokenType_End; t->token_type = kUpb_TokenType_End;
upb_String_Clear(&t->current.text); upb_String_Clear(&t->token_text);
t->current.line = t->line; t->token_line = t->line;
t->current.column = t->column; t->token_column = t->column;
t->current.end_column = t->column; t->token_end_column = t->column;
return false; return false;
} }
@ -1038,13 +1001,17 @@ upb_Tokenizer* upb_Tokenizer_New(const void* data, size_t size,
} }
t->options = options; t->options = options;
upb_String_Init(&t->token_text, arena);
t->token_type = kUpb_TokenType_Start;
t->token_line = 0;
t->token_column = 0;
t->token_end_column = 0;
t->previous_type = kUpb_TokenType_Start; t->previous_type = kUpb_TokenType_Start;
t->previous_line = 0; t->previous_line = 0;
t->previous_column = 0; t->previous_column = 0;
t->previous_end_column = 0; t->previous_end_column = 0;
upb_Token_Init(&t->current, arena);
if (size) { if (size) {
t->current_char = t->buffer[0]; t->current_char = t->buffer[0];
} else { } else {

@ -132,17 +132,12 @@ bool upb_Tokenizer_Next(upb_Tokenizer* t);
// Accessors for inspecting current/previous parse tokens, // Accessors for inspecting current/previous parse tokens,
// which are opaque to the tokenizer (to reduce copying). // which are opaque to the tokenizer (to reduce copying).
upb_TokenType upb_Tokenizer_CurrentType(const upb_Tokenizer* t); upb_TokenType upb_Tokenizer_Type(const upb_Tokenizer* t);
int upb_Tokenizer_CurrentColumn(const upb_Tokenizer* t); int upb_Tokenizer_Column(const upb_Tokenizer* t);
int upb_Tokenizer_CurrentEndColumn(const upb_Tokenizer* t); int upb_Tokenizer_EndColumn(const upb_Tokenizer* t);
int upb_Tokenizer_CurrentLine(const upb_Tokenizer* t); int upb_Tokenizer_Line(const upb_Tokenizer* t);
int upb_Tokenizer_CurrentTextSize(const upb_Tokenizer* t); int upb_Tokenizer_TextSize(const upb_Tokenizer* t);
const char* upb_Tokenizer_CurrentTextData(const upb_Tokenizer* t); const char* upb_Tokenizer_TextData(const upb_Tokenizer* t);
upb_TokenType upb_Tokenizer_PreviousType(const upb_Tokenizer* t);
int upb_Tokenizer_PreviousColumn(const upb_Tokenizer* t);
int upb_Tokenizer_PreviousEndColumn(const upb_Tokenizer* t);
int upb_Tokenizer_PreviousLine(const upb_Tokenizer* t);
// Parses a TYPE_INTEGER token. Returns false if the result would be // Parses a TYPE_INTEGER token. Returns false if the result would be
// greater than max_value. Otherwise, returns true and sets *output to the // greater than max_value. Otherwise, returns true and sets *output to the

@ -244,37 +244,34 @@ TEST_2D(TokenizerTest, SimpleTokens, kSimpleTokenCases, kBlockSizes) {
auto t = upb_Tokenizer_New(NULL, 0, input, error_collector, 0, arena.ptr()); auto t = upb_Tokenizer_New(NULL, 0, input, error_collector, 0, arena.ptr());
// Before Next() is called, the initial token should always be TYPE_START. // Before Next() is called, the initial token should always be TYPE_START.
EXPECT_EQ(upb_Tokenizer_CurrentType(t), kUpb_TokenType_Start); EXPECT_EQ(upb_Tokenizer_Type(t), kUpb_TokenType_Start);
EXPECT_EQ(upb_Tokenizer_CurrentLine(t), 0); EXPECT_EQ(upb_Tokenizer_Line(t), 0);
EXPECT_EQ(upb_Tokenizer_CurrentColumn(t), 0); EXPECT_EQ(upb_Tokenizer_Column(t), 0);
EXPECT_EQ(upb_Tokenizer_CurrentEndColumn(t), 0); EXPECT_EQ(upb_Tokenizer_EndColumn(t), 0);
EXPECT_TRUE(StringEquals(upb_Tokenizer_CurrentTextData(t), "")); EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), ""));
// Parse the token. // Parse the token.
EXPECT_TRUE(upb_Tokenizer_Next(t)); EXPECT_TRUE(upb_Tokenizer_Next(t));
// Check that it has the right type. // Check that it has the right type.
EXPECT_EQ(upb_Tokenizer_CurrentType(t), kSimpleTokenCases_case.type); EXPECT_EQ(upb_Tokenizer_Type(t), kSimpleTokenCases_case.type);
// Check that it contains the complete input text. // Check that it contains the complete input text.
EXPECT_TRUE(StringEquals(upb_Tokenizer_CurrentTextData(t), EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t),
kSimpleTokenCases_case.input.data())); kSimpleTokenCases_case.input.data()));
// Check that it is located at the beginning of the input // Check that it is located at the beginning of the input
EXPECT_EQ(upb_Tokenizer_CurrentLine(t), 0); EXPECT_EQ(upb_Tokenizer_Line(t), 0);
EXPECT_EQ(upb_Tokenizer_CurrentColumn(t), 0); EXPECT_EQ(upb_Tokenizer_Column(t), 0);
EXPECT_EQ(upb_Tokenizer_CurrentEndColumn(t), EXPECT_EQ(upb_Tokenizer_EndColumn(t), kSimpleTokenCases_case.input.size());
kSimpleTokenCases_case.input.size());
// There should be no more input. // There should be no more input.
EXPECT_FALSE(upb_Tokenizer_Next(t)); EXPECT_FALSE(upb_Tokenizer_Next(t));
// After Next() returns false, the token should have type TYPE_END. // After Next() returns false, the token should have type TYPE_END.
EXPECT_EQ(upb_Tokenizer_CurrentType(t), kUpb_TokenType_End); EXPECT_EQ(upb_Tokenizer_Type(t), kUpb_TokenType_End);
EXPECT_EQ(upb_Tokenizer_CurrentLine(t), 0); EXPECT_EQ(upb_Tokenizer_Line(t), 0);
EXPECT_EQ(upb_Tokenizer_CurrentColumn(t), EXPECT_EQ(upb_Tokenizer_Column(t), kSimpleTokenCases_case.input.size());
kSimpleTokenCases_case.input.size()); EXPECT_EQ(upb_Tokenizer_EndColumn(t), kSimpleTokenCases_case.input.size());
EXPECT_EQ(upb_Tokenizer_CurrentEndColumn(t), EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), ""));
kSimpleTokenCases_case.input.size());
EXPECT_TRUE(StringEquals(upb_Tokenizer_CurrentTextData(t), ""));
// There should be no errors. // There should be no errors.
EXPECT_TRUE(upb_String_Empty(&((TestErrorCollector*)error_collector)->text)); EXPECT_TRUE(upb_String_Empty(&((TestErrorCollector*)error_collector)->text));
@ -296,20 +293,20 @@ TEST_1D(TokenizerTest, FloatSuffix, kBlockSizes) {
// Advance through tokens and check that they are parsed as expected. // Advance through tokens and check that they are parsed as expected.
EXPECT_TRUE(upb_Tokenizer_Next(t)); EXPECT_TRUE(upb_Tokenizer_Next(t));
EXPECT_EQ(upb_Tokenizer_CurrentType(t), kUpb_TokenType_Float); EXPECT_EQ(upb_Tokenizer_Type(t), kUpb_TokenType_Float);
EXPECT_TRUE(StringEquals(upb_Tokenizer_CurrentTextData(t), "1f")); EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), "1f"));
EXPECT_TRUE(upb_Tokenizer_Next(t)); EXPECT_TRUE(upb_Tokenizer_Next(t));
EXPECT_EQ(upb_Tokenizer_CurrentType(t), kUpb_TokenType_Float); EXPECT_EQ(upb_Tokenizer_Type(t), kUpb_TokenType_Float);
EXPECT_TRUE(StringEquals(upb_Tokenizer_CurrentTextData(t), "2.5f")); EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), "2.5f"));
EXPECT_TRUE(upb_Tokenizer_Next(t)); EXPECT_TRUE(upb_Tokenizer_Next(t));
EXPECT_EQ(upb_Tokenizer_CurrentType(t), kUpb_TokenType_Float); EXPECT_EQ(upb_Tokenizer_Type(t), kUpb_TokenType_Float);
EXPECT_TRUE(StringEquals(upb_Tokenizer_CurrentTextData(t), "6e3f")); EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), "6e3f"));
EXPECT_TRUE(upb_Tokenizer_Next(t)); EXPECT_TRUE(upb_Tokenizer_Next(t));
EXPECT_EQ(upb_Tokenizer_CurrentType(t), kUpb_TokenType_Float); EXPECT_EQ(upb_Tokenizer_Type(t), kUpb_TokenType_Float);
EXPECT_TRUE(StringEquals(upb_Tokenizer_CurrentTextData(t), "7F")); EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), "7F"));
// There should be no more input. // There should be no more input.
EXPECT_FALSE(upb_Tokenizer_Next(t)); EXPECT_FALSE(upb_Tokenizer_Next(t));
@ -351,8 +348,8 @@ TEST_2D(TokenizerTest, Whitespace, kWhitespaceTokenCases, kBlockSizes) {
EXPECT_TRUE(upb_Tokenizer_Next(t)); EXPECT_TRUE(upb_Tokenizer_Next(t));
EXPECT_EQ(upb_Tokenizer_CurrentType(t), kWhitespaceTokenCases_case.type); EXPECT_EQ(upb_Tokenizer_Type(t), kWhitespaceTokenCases_case.type);
EXPECT_TRUE(StringEquals(upb_Tokenizer_CurrentTextData(t), EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t),
kWhitespaceTokenCases_case.input.data())); kWhitespaceTokenCases_case.input.data()));
EXPECT_FALSE(upb_Tokenizer_Next(t)); EXPECT_FALSE(upb_Tokenizer_Next(t));
} }
@ -483,11 +480,11 @@ TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) {
auto t = upb_Tokenizer_New(NULL, 0, input, error_collector, 0, arena.ptr()); auto t = upb_Tokenizer_New(NULL, 0, input, error_collector, 0, arena.ptr());
// Before Next() is called, the initial token should always be TYPE_START. // Before Next() is called, the initial token should always be TYPE_START.
EXPECT_EQ(upb_Tokenizer_CurrentType(t), kUpb_TokenType_Start); EXPECT_EQ(upb_Tokenizer_Type(t), kUpb_TokenType_Start);
EXPECT_EQ(upb_Tokenizer_CurrentLine(t), 0); EXPECT_EQ(upb_Tokenizer_Line(t), 0);
EXPECT_EQ(upb_Tokenizer_CurrentColumn(t), 0); EXPECT_EQ(upb_Tokenizer_Column(t), 0);
EXPECT_EQ(upb_Tokenizer_CurrentEndColumn(t), 0); EXPECT_EQ(upb_Tokenizer_EndColumn(t), 0);
EXPECT_TRUE(StringEquals(upb_Tokenizer_CurrentTextData(t), "")); EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), ""));
// Loop through all expected tokens. // Loop through all expected tokens.
TokenFields token_fields; TokenFields token_fields;
@ -498,11 +495,6 @@ TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) {
SCOPED_TRACE(testing::Message() SCOPED_TRACE(testing::Message()
<< "Token #" << i << ": " << absl::CEscape(token_fields.text)); << "Token #" << i << ": " << absl::CEscape(token_fields.text));
const upb_TokenType old_type = upb_Tokenizer_CurrentType(t);
const int old_line = upb_Tokenizer_CurrentLine(t);
const int old_column = upb_Tokenizer_CurrentColumn(t);
const int old_end_column = upb_Tokenizer_CurrentEndColumn(t);
// Next() should only return false when it hits the end token. // Next() should only return false when it hits the end token.
if (token_fields.type == kUpb_TokenType_End) { if (token_fields.type == kUpb_TokenType_End) {
EXPECT_FALSE(upb_Tokenizer_Next(t)); EXPECT_FALSE(upb_Tokenizer_Next(t));
@ -510,20 +502,14 @@ TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) {
EXPECT_TRUE(upb_Tokenizer_Next(t)); EXPECT_TRUE(upb_Tokenizer_Next(t));
} }
// Check that the previous token is set correctly.
EXPECT_EQ(upb_Tokenizer_PreviousType(t), old_type);
EXPECT_EQ(upb_Tokenizer_PreviousLine(t), old_line);
EXPECT_EQ(upb_Tokenizer_PreviousColumn(t), old_column);
EXPECT_EQ(upb_Tokenizer_PreviousEndColumn(t), old_end_column);
// Check that the token matches the expected one. // Check that the token matches the expected one.
EXPECT_EQ(upb_Tokenizer_CurrentType(t), token_fields.type); EXPECT_EQ(upb_Tokenizer_Type(t), token_fields.type);
EXPECT_EQ(upb_Tokenizer_CurrentLine(t), token_fields.line); EXPECT_EQ(upb_Tokenizer_Line(t), token_fields.line);
EXPECT_EQ(upb_Tokenizer_CurrentColumn(t), token_fields.column); EXPECT_EQ(upb_Tokenizer_Column(t), token_fields.column);
EXPECT_EQ(upb_Tokenizer_CurrentEndColumn(t), token_fields.end_column); EXPECT_EQ(upb_Tokenizer_EndColumn(t), token_fields.end_column);
EXPECT_EQ(upb_Tokenizer_CurrentTextSize(t), token_fields.text.size()); EXPECT_EQ(upb_Tokenizer_TextSize(t), token_fields.text.size());
EXPECT_TRUE(StringEquals(upb_Tokenizer_CurrentTextData(t), EXPECT_TRUE(
token_fields.text.data())); StringEquals(upb_Tokenizer_TextData(t), token_fields.text.data()));
} while (token_fields.type != kUpb_TokenType_End); } while (token_fields.type != kUpb_TokenType_End);
// There should be no errors. // There should be no errors.
@ -563,11 +549,11 @@ TEST_2D(TokenizerTest, MultipleWhitespaceTokens, kMultiWhitespaceTokenCases,
upb_Tokenizer_New(NULL, 0, input, error_collector, options, arena.ptr()); upb_Tokenizer_New(NULL, 0, input, error_collector, options, arena.ptr());
// Before Next() is called, the initial token should always be TYPE_START. // Before Next() is called, the initial token should always be TYPE_START.
EXPECT_EQ(upb_Tokenizer_CurrentType(t), kUpb_TokenType_Start); EXPECT_EQ(upb_Tokenizer_Type(t), kUpb_TokenType_Start);
EXPECT_EQ(upb_Tokenizer_CurrentLine(t), 0); EXPECT_EQ(upb_Tokenizer_Line(t), 0);
EXPECT_EQ(upb_Tokenizer_CurrentColumn(t), 0); EXPECT_EQ(upb_Tokenizer_Column(t), 0);
EXPECT_EQ(upb_Tokenizer_CurrentEndColumn(t), 0); EXPECT_EQ(upb_Tokenizer_EndColumn(t), 0);
EXPECT_TRUE(StringEquals(upb_Tokenizer_CurrentTextData(t), "")); EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), ""));
// Loop through all expected tokens. // Loop through all expected tokens.
TokenFields token_fields; TokenFields token_fields;
@ -578,11 +564,6 @@ TEST_2D(TokenizerTest, MultipleWhitespaceTokens, kMultiWhitespaceTokenCases,
SCOPED_TRACE(testing::Message() SCOPED_TRACE(testing::Message()
<< "Token #" << i << ": " << token_fields.text); << "Token #" << i << ": " << token_fields.text);
const upb_TokenType old_type = upb_Tokenizer_CurrentType(t);
const int old_line = upb_Tokenizer_CurrentLine(t);
const int old_column = upb_Tokenizer_CurrentColumn(t);
const int old_end_column = upb_Tokenizer_CurrentEndColumn(t);
// Next() should only return false when it hits the end token. // Next() should only return false when it hits the end token.
if (token_fields.type != kUpb_TokenType_End) { if (token_fields.type != kUpb_TokenType_End) {
EXPECT_TRUE(upb_Tokenizer_Next(t)); EXPECT_TRUE(upb_Tokenizer_Next(t));
@ -590,19 +571,13 @@ TEST_2D(TokenizerTest, MultipleWhitespaceTokens, kMultiWhitespaceTokenCases,
EXPECT_FALSE(upb_Tokenizer_Next(t)); EXPECT_FALSE(upb_Tokenizer_Next(t));
} }
// Check that the previous token is set correctly.
EXPECT_EQ(upb_Tokenizer_PreviousType(t), old_type);
EXPECT_EQ(upb_Tokenizer_PreviousLine(t), old_line);
EXPECT_EQ(upb_Tokenizer_PreviousColumn(t), old_column);
EXPECT_EQ(upb_Tokenizer_PreviousEndColumn(t), old_end_column);
// Check that the token matches the expected one. // Check that the token matches the expected one.
EXPECT_EQ(upb_Tokenizer_CurrentType(t), token_fields.type); EXPECT_EQ(upb_Tokenizer_Type(t), token_fields.type);
EXPECT_EQ(upb_Tokenizer_CurrentLine(t), token_fields.line); EXPECT_EQ(upb_Tokenizer_Line(t), token_fields.line);
EXPECT_EQ(upb_Tokenizer_CurrentColumn(t), token_fields.column); EXPECT_EQ(upb_Tokenizer_Column(t), token_fields.column);
EXPECT_EQ(upb_Tokenizer_CurrentEndColumn(t), token_fields.end_column); EXPECT_EQ(upb_Tokenizer_EndColumn(t), token_fields.end_column);
EXPECT_TRUE(StringEquals(upb_Tokenizer_CurrentTextData(t), EXPECT_TRUE(
token_fields.text.data())); StringEquals(upb_Tokenizer_TextData(t), token_fields.text.data()));
} while (token_fields.type != kUpb_TokenType_End); } while (token_fields.type != kUpb_TokenType_End);
// There should be no errors. // There should be no errors.
@ -637,7 +612,7 @@ TEST_1D(TokenizerTest, ShCommentStyle, kBlockSizes) {
// Advance through tokens and check that they are parsed as expected. // Advance through tokens and check that they are parsed as expected.
for (int i = 0; i < arraysize(kTokens); i++) { for (int i = 0; i < arraysize(kTokens); i++) {
EXPECT_TRUE(upb_Tokenizer_Next(t)); EXPECT_TRUE(upb_Tokenizer_Next(t));
EXPECT_TRUE(StringEquals(upb_Tokenizer_CurrentTextData(t), kTokens[i])); EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), kTokens[i]));
} }
// There should be no more input. // There should be no more input.
@ -1189,7 +1164,7 @@ TEST_2D(TokenizerTest, Errors, kErrorCases, kBlockSizes) {
// Ignore all input, except remember if the last token was "foo". // Ignore all input, except remember if the last token was "foo".
bool last_was_foo = false; bool last_was_foo = false;
while (upb_Tokenizer_Next(t)) { while (upb_Tokenizer_Next(t)) {
last_was_foo = StringEquals(upb_Tokenizer_CurrentTextData(t), "foo"); last_was_foo = StringEquals(upb_Tokenizer_TextData(t), "foo");
} }
// Check that the errors match what was expected. // Check that the errors match what was expected.

Loading…
Cancel
Save