These functions are not yet part of the upb build but this is a good chunk of work so let's snapshot it now. PiperOrigin-RevId: 467733791pull/13171/head
parent
0c6531378d
commit
6861966501
8 changed files with 3084 additions and 2 deletions
@ -0,0 +1,135 @@ |
||||
/*
|
||||
* Copyright (c) 2009-2021, Google LLC |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions are met: |
||||
* * Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* * Redistributions in binary form must reproduce the above copyright |
||||
* notice, this list of conditions and the following disclaimer in the |
||||
* documentation and/or other materials provided with the distribution. |
||||
* * Neither the name of Google LLC nor the |
||||
* names of its contributors may be used to endorse or promote products |
||||
* derived from this software without specific prior written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, |
||||
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
// An attempt to provide some of the C++ string functionality in C.
|
||||
// Function names generally match those of corresponding C++ string methods.
|
||||
// All buffers are copied so operations are relatively expensive.
|
||||
// Internal character strings are always NULL-terminated.
|
||||
// All bool functions return true on success, false on failure.
|
||||
|
||||
#ifndef UPB_IO_STRING_H_ |
||||
#define UPB_IO_STRING_H_ |
||||
|
||||
#include <string.h> |
||||
|
||||
#include "upb/arena.h" |
||||
|
||||
// Must be last.
|
||||
#include "upb/port_def.inc" |
||||
|
||||
#ifdef __cplusplus |
||||
extern "C" { |
||||
#endif |
||||
|
||||
// Do not directly access the fields of this struct - use the accessors only.
|
||||
// TODO(salo): Add a small (16 bytes, maybe?) internal buffer so we can avoid
|
||||
// hitting the arena for short strings.
|
||||
typedef struct { |
||||
size_t size_; |
||||
size_t capacity_; |
||||
char* data_; |
||||
upb_Arena* arena_; |
||||
} upb_String; |
||||
|
||||
// Initialize an already-allocted upb_String object.
|
||||
UPB_INLINE bool upb_String_Init(upb_String* s, upb_Arena* a) { |
||||
static const int kDefaultCapacity = 16; |
||||
|
||||
s->size_ = 0; |
||||
s->capacity_ = kDefaultCapacity; |
||||
s->data_ = (char*)upb_Arena_Malloc(a, kDefaultCapacity); |
||||
if (!s->data_) return false; |
||||
s->data_[0] = '\0'; |
||||
s->arena_ = a; |
||||
return true; |
||||
} |
||||
|
||||
UPB_INLINE void upb_String_Clear(upb_String* s) { |
||||
s->size_ = 0; |
||||
s->data_[0] = '\0'; |
||||
} |
||||
|
||||
UPB_INLINE char* upb_String_Data(const upb_String* s) { return s->data_; } |
||||
|
||||
UPB_INLINE size_t upb_String_Size(const upb_String* s) { return s->size_; } |
||||
|
||||
UPB_INLINE bool upb_String_Empty(const upb_String* s) { return s->size_ == 0; } |
||||
|
||||
UPB_INLINE void upb_String_Erase(upb_String* s, size_t pos, size_t len) { |
||||
if (pos >= s->size_) return; |
||||
char* des = s->data_ + pos; |
||||
if (pos + len > s->size_) len = s->size_ - pos; |
||||
char* src = des + len; |
||||
memmove(des, src, s->size_ - (src - s->data_) + 1); |
||||
s->size_ -= len; |
||||
} |
||||
|
||||
UPB_INLINE bool upb_String_Reserve(upb_String* s, size_t size) { |
||||
if (s->capacity_ <= size) { |
||||
const size_t new_cap = size + 1; |
||||
s->data_ = |
||||
(char*)upb_Arena_Realloc(s->arena_, s->data_, s->capacity_, new_cap); |
||||
if (!s->data_) return false; |
||||
s->capacity_ = new_cap; |
||||
} |
||||
return true; |
||||
} |
||||
|
||||
UPB_INLINE bool upb_String_Append(upb_String* s, const char* data, |
||||
size_t size) { |
||||
if (s->capacity_ <= s->size_ + size) { |
||||
const size_t new_cap = 2 * (s->size_ + size) + 1; |
||||
if (!upb_String_Reserve(s, new_cap)) return false; |
||||
} |
||||
|
||||
memcpy(s->data_ + s->size_, data, size); |
||||
s->size_ += size; |
||||
s->data_[s->size_] = '\0'; |
||||
return true; |
||||
} |
||||
|
||||
UPB_INLINE bool upb_String_Assign(upb_String* s, const char* data, |
||||
size_t size) { |
||||
upb_String_Clear(s); |
||||
return upb_String_Append(s, data, size); |
||||
} |
||||
|
||||
UPB_INLINE bool upb_String_Copy(upb_String* des, const upb_String* src) { |
||||
return upb_String_Assign(des, src->data_, src->size_); |
||||
} |
||||
|
||||
UPB_INLINE bool upb_String_PushBack(upb_String* s, char ch) { |
||||
return upb_String_Append(s, &ch, 1); |
||||
} |
||||
|
||||
#ifdef __cplusplus |
||||
} /* extern "C" */ |
||||
#endif |
||||
|
||||
#include "upb/port_undef.inc" |
||||
|
||||
#endif /* UPB_IO_STRING_H_ */ |
@ -0,0 +1,127 @@ |
||||
/*
|
||||
* Copyright (c) 2009-2022, Google LLC |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions are met: |
||||
* * Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* * Redistributions in binary form must reproduce the above copyright |
||||
* notice, this list of conditions and the following disclaimer in the |
||||
* documentation and/or other materials provided with the distribution. |
||||
* * Neither the name of Google LLC nor the |
||||
* names of its contributors may be used to endorse or promote products |
||||
* derived from this software without specific prior written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, |
||||
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
#include "upb/io/string.h" |
||||
|
||||
#include <string.h> |
||||
|
||||
#include "gtest/gtest.h" |
||||
#include "upb/upb.hpp" |
||||
|
||||
TEST(StringTest, Append) { |
||||
upb::Arena arena; |
||||
|
||||
upb_String foo; |
||||
EXPECT_TRUE(upb_String_Init(&foo, arena.ptr())); |
||||
EXPECT_EQ(upb_String_Size(&foo), 0); |
||||
|
||||
EXPECT_TRUE(upb_String_Assign(&foo, "foobar", 3)); |
||||
EXPECT_EQ(upb_String_Size(&foo), 3); |
||||
EXPECT_EQ(strcmp(upb_String_Data(&foo), "foo"), 0); |
||||
|
||||
EXPECT_TRUE(upb_String_Append(&foo, "bar", 3)); |
||||
EXPECT_EQ(upb_String_Size(&foo), 6); |
||||
EXPECT_EQ(strcmp(upb_String_Data(&foo), "foobar"), 0); |
||||
|
||||
EXPECT_TRUE(upb_String_Append(&foo, "baz", 3)); |
||||
EXPECT_EQ(upb_String_Size(&foo), 9); |
||||
EXPECT_EQ(strcmp(upb_String_Data(&foo), "foobarbaz"), 0); |
||||
|
||||
EXPECT_TRUE(upb_String_Append(&foo, "bat", 3)); |
||||
EXPECT_EQ(upb_String_Size(&foo), 12); |
||||
EXPECT_EQ(strcmp(upb_String_Data(&foo), "foobarbazbat"), 0); |
||||
|
||||
EXPECT_TRUE(upb_String_Append(&foo, "feefiefoefoo", 12)); |
||||
EXPECT_EQ(upb_String_Size(&foo), 24); |
||||
EXPECT_EQ(strcmp(upb_String_Data(&foo), "foobarbazbatfeefiefoefoo"), 0); |
||||
|
||||
const char* password = "fiddlesnarf"; |
||||
EXPECT_TRUE(upb_String_Assign(&foo, password, strlen(password))); |
||||
EXPECT_EQ(upb_String_Size(&foo), strlen(password)); |
||||
EXPECT_EQ(strcmp(upb_String_Data(&foo), password), 0); |
||||
} |
||||
|
||||
TEST(StringTest, PushBack) { |
||||
upb::Arena arena; |
||||
|
||||
upb_String foo; |
||||
EXPECT_TRUE(upb_String_Init(&foo, arena.ptr())); |
||||
EXPECT_EQ(upb_String_Size(&foo), 0); |
||||
|
||||
const std::string big = |
||||
"asfashfxauwhfwu4fuwafxasnfwxnxwunxuwxufhwfaiwj4w9jvwxssldfjlasviorwnvwij" |
||||
"grsdjrfiasrjrasijgraisjvrvoiasjspjfsjgfasjgiasjidjsrvjsrjrasjfrijwjajsrF" |
||||
"JWJGF4WWJSAVSLJArSJGFrAISJGASrlafjgrivarijrraisrgjiawrijg3874f87f7hqfhpf" |
||||
"f8929hr32p8475902387459023475297328-22-3776-26"; |
||||
EXPECT_TRUE(upb_String_Reserve(&foo, big.size() + 1)); |
||||
EXPECT_TRUE(upb_String_Append(&foo, big.data(), big.size())); |
||||
EXPECT_EQ(upb_String_Size(&foo), big.size()); |
||||
EXPECT_EQ(strcmp(upb_String_Data(&foo), big.data()), 0); |
||||
|
||||
upb_String bar; |
||||
EXPECT_TRUE(upb_String_Init(&bar, arena.ptr())); |
||||
EXPECT_EQ(upb_String_Size(&bar), 0); |
||||
|
||||
EXPECT_TRUE(upb_String_PushBack(&bar, 'x')); |
||||
EXPECT_TRUE(upb_String_PushBack(&bar, 'y')); |
||||
EXPECT_TRUE(upb_String_PushBack(&bar, 'z')); |
||||
EXPECT_TRUE(upb_String_PushBack(&bar, 'z')); |
||||
EXPECT_TRUE(upb_String_PushBack(&bar, 'y')); |
||||
EXPECT_EQ(upb_String_Size(&bar), 5); |
||||
EXPECT_EQ(strcmp(upb_String_Data(&bar), "xyzzy"), 0); |
||||
} |
||||
|
||||
TEST(StringTest, Erase) { |
||||
upb::Arena arena; |
||||
|
||||
upb_String foo; |
||||
EXPECT_TRUE(upb_String_Init(&foo, arena.ptr())); |
||||
|
||||
const char* sent = "This is an example sentence."; |
||||
EXPECT_TRUE(upb_String_Assign(&foo, sent, strlen(sent))); |
||||
EXPECT_EQ(upb_String_Size(&foo), 28); |
||||
|
||||
upb_String_Erase(&foo, 10, 8); |
||||
EXPECT_EQ(upb_String_Size(&foo), 20); |
||||
EXPECT_EQ(strcmp(upb_String_Data(&foo), "This is an sentence."), 0); |
||||
|
||||
upb_String_Erase(&foo, 9, 1); |
||||
EXPECT_EQ(upb_String_Size(&foo), 19); |
||||
EXPECT_EQ(strcmp(upb_String_Data(&foo), "This is a sentence."), 0); |
||||
|
||||
upb_String_Erase(&foo, 5, 5); |
||||
EXPECT_EQ(upb_String_Size(&foo), 14); |
||||
EXPECT_EQ(strcmp(upb_String_Data(&foo), "This sentence."), 0); |
||||
|
||||
upb_String_Erase(&foo, 4, 99); |
||||
EXPECT_EQ(upb_String_Size(&foo), 4); |
||||
EXPECT_EQ(strcmp(upb_String_Data(&foo), "This"), 0); |
||||
|
||||
upb_String_Erase(&foo, 0, 4); |
||||
EXPECT_EQ(upb_String_Size(&foo), 0); |
||||
EXPECT_EQ(strcmp(upb_String_Data(&foo), ""), 0); |
||||
} |
@ -0,0 +1,98 @@ |
||||
/*
|
||||
* Copyright (c) 2009-2022, Google LLC |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions are met: |
||||
* * Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* * Redistributions in binary form must reproduce the above copyright |
||||
* notice, this list of conditions and the following disclaimer in the |
||||
* documentation and/or other materials provided with the distribution. |
||||
* * Neither the name of Google LLC nor the |
||||
* names of its contributors may be used to endorse or promote products |
||||
* derived from this software without specific prior written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, |
||||
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
#include "upb/io/strtod.h" |
||||
|
||||
#include <stdio.h> |
||||
#include <stdlib.h> |
||||
#include <string.h> |
||||
|
||||
// Must be last.
|
||||
#include "upb/port_def.inc" |
||||
|
||||
// Determine the locale-specific radix character by calling sprintf() to print
|
||||
// the number 1.5, then stripping off the digits. As far as I can tell, this
|
||||
// is the only portable, thread-safe way to get the C library to divulge the
|
||||
// locale's radix character. No, localeconv() is NOT thread-safe.
|
||||
|
||||
static int GetLocaleRadix(char *data, size_t capacity) { |
||||
char temp[16]; |
||||
const int size = snprintf(temp, sizeof(temp), "%.1f", 1.5); |
||||
UPB_ASSERT(temp[0] == '1'); |
||||
UPB_ASSERT(temp[size - 1] == '5'); |
||||
UPB_ASSERT(size < capacity); |
||||
temp[size - 1] = '\0'; |
||||
strcpy(data, temp + 1); |
||||
return size - 2; |
||||
} |
||||
|
||||
// Populates a string identical to *input except that the character pointed to
|
||||
// by pos (which should be '.') is replaced with the locale-specific radix.
|
||||
|
||||
static void LocalizeRadix(const char *input, const char *pos, char *output) { |
||||
const int len1 = pos - input; |
||||
|
||||
char radix[8]; |
||||
const int len2 = GetLocaleRadix(radix, sizeof(radix)); |
||||
|
||||
memcpy(output, input, len1); |
||||
memcpy(output + len1, radix, len2); |
||||
strcpy(output + len1 + len2, input + len1 + 1); |
||||
} |
||||
|
||||
double NoLocaleStrtod(const char *str, char **endptr) { |
||||
// We cannot simply set the locale to "C" temporarily with setlocale()
|
||||
// as this is not thread-safe. Instead, we try to parse in the current
|
||||
// locale first. If parsing stops at a '.' character, then this is a
|
||||
// pretty good hint that we're actually in some other locale in which
|
||||
// '.' is not the radix character.
|
||||
|
||||
char *temp_endptr; |
||||
double result = strtod(str, &temp_endptr); |
||||
if (endptr != NULL) *endptr = temp_endptr; |
||||
if (*temp_endptr != '.') return result; |
||||
|
||||
// Parsing halted on a '.'. Perhaps we're in a different locale? Let's
|
||||
// try to replace the '.' with a locale-specific radix character and
|
||||
// try again.
|
||||
|
||||
char localized[80]; |
||||
LocalizeRadix(str, temp_endptr, localized); |
||||
char *localized_endptr; |
||||
result = strtod(localized, &localized_endptr); |
||||
if ((localized_endptr - &localized[0]) > (temp_endptr - str)) { |
||||
// This attempt got further, so replacing the decimal must have helped.
|
||||
// Update endptr to point at the right location.
|
||||
if (endptr != NULL) { |
||||
// size_diff is non-zero if the localized radix has multiple bytes.
|
||||
int size_diff = strlen(localized) - strlen(str); |
||||
*endptr = (char *)str + (localized_endptr - &localized[0] - size_diff); |
||||
} |
||||
} |
||||
|
||||
return result; |
||||
} |
@ -0,0 +1,46 @@ |
||||
/*
|
||||
* Copyright (c) 2009-2022, Google LLC |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions are met: |
||||
* * Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* * Redistributions in binary form must reproduce the above copyright |
||||
* notice, this list of conditions and the following disclaimer in the |
||||
* documentation and/or other materials provided with the distribution. |
||||
* * Neither the name of Google LLC nor the |
||||
* names of its contributors may be used to endorse or promote products |
||||
* derived from this software without specific prior written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, |
||||
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
#ifndef UPB_IO_STRTOD_H_ |
||||
#define UPB_IO_STRTOD_H_ |
||||
|
||||
// Must be last.
|
||||
#include "upb/port_def.inc" |
||||
|
||||
#ifdef __cplusplus |
||||
extern "C" { |
||||
#endif |
||||
|
||||
double NoLocaleStrtod(const char *str, char **endptr); |
||||
|
||||
#ifdef __cplusplus |
||||
} /* extern "C" */ |
||||
#endif |
||||
|
||||
#include "upb/port_undef.inc" |
||||
|
||||
#endif /* UPB_IO_STRTOD_H_ */ |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,187 @@ |
||||
/*
|
||||
* Copyright (c) 2009-2022, Google LLC |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions are met: |
||||
* * Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* * Redistributions in binary form must reproduce the above copyright |
||||
* notice, this list of conditions and the following disclaimer in the |
||||
* documentation and/or other materials provided with the distribution. |
||||
* * Neither the name of Google LLC nor the |
||||
* names of its contributors may be used to endorse or promote products |
||||
* derived from this software without specific prior written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, |
||||
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
// Class for parsing tokenized text from a ZeroCopyInputStream.
|
||||
|
||||
#ifndef UPB_IO_TOKENIZER_H_ |
||||
#define UPB_IO_TOKENIZER_H_ |
||||
|
||||
#include "upb/io/string.h" |
||||
#include "upb/io/zero_copy_input_stream.h" |
||||
#include "upb/upb.h" |
||||
|
||||
// Must be included last.
|
||||
#include "upb/port_def.inc" |
||||
|
||||
#ifdef __cplusplus |
||||
extern "C" { |
||||
#endif |
||||
|
||||
typedef enum { |
||||
kUpb_TokenType_Start, // Next() has not yet been called.
|
||||
kUpb_TokenType_End, // End of input reached. "text" is empty.
|
||||
|
||||
// A sequence of letters, digits, and underscores, not starting with a digit.
|
||||
// It is an error for a number to be followed by an identifier with no space
|
||||
// in between.
|
||||
kUpb_TokenType_Identifier, |
||||
|
||||
// A sequence of digits representing an integer. Normally the digits are
|
||||
// decimal, but a prefix of "0x" indicates a hex number and a leading zero
|
||||
// indicates octal, just like with C numeric literals. A leading negative
|
||||
// sign is NOT included in the token; it's up to the parser to interpret the
|
||||
// unary minus operator on its own.
|
||||
kUpb_TokenType_Integer, |
||||
|
||||
// A floating point literal, with a fractional part and/or an exponent.
|
||||
// Always in decimal. Again, never negative.
|
||||
kUpb_TokenType_Float, |
||||
|
||||
// A quoted sequence of escaped characters.
|
||||
// Either single or double quotes can be used, but they must match.
|
||||
// A string literal cannot cross a line break.
|
||||
kUpb_TokenType_String, |
||||
|
||||
// Any other printable character, like '!' or '+'.
|
||||
// Symbols are always a single character, so "!+$%" is four tokens.
|
||||
kUpb_TokenType_Symbol, |
||||
|
||||
// A sequence of whitespace.
|
||||
// This token type is only produced if report_whitespace() is true.
|
||||
// It is not reported for whitespace within comments or strings.
|
||||
kUpb_TokenType_Whitespace, |
||||
|
||||
// A newline ('\n'). This token type is only produced if report_whitespace()
|
||||
// is true and report_newlines() is also true.
|
||||
// It is not reported for newlines in comments or strings.
|
||||
kUpb_TokenType_Newline, |
||||
} upb_TokenType; |
||||
|
||||
typedef enum { |
||||
// Set to allow floats to be suffixed with the letter 'f'. Tokens which would
|
||||
// otherwise be integers but which have the 'f' suffix will be forced to be
|
||||
// interpreted as floats. For all other purposes, the 'f' is ignored.
|
||||
kUpb_TokenizerOption_AllowFAfterFloat = 1 << 0, |
||||
|
||||
// If set, allow string literals to span multiple lines.
|
||||
// Do not use this; for Google-internal cleanup only.
|
||||
kUpb_TokenizerOption_AllowMultilineStrings = 1 << 1, |
||||
|
||||
// If set, allow a field name to appear immediately after a number without
|
||||
// requiring any intervening whitespace as a delimiter.
|
||||
// Do not use this; for Google-internal cleanup only.
|
||||
kUpb_TokenizerOption_AllowFieldImmediatelyAfterNumber = 1 << 2, |
||||
|
||||
// If set, whitespace tokens are reported by Next().
|
||||
kUpb_TokenizerOption_ReportWhitespace = 1 << 3, |
||||
|
||||
// If set, newline tokens are reported by Next(). Implies ReportWhitespace.
|
||||
kUpb_TokenizerOption_ReportNewlines = 1 << 4, |
||||
|
||||
// By default the tokenizer expects C-style (/* */) comments.
|
||||
// If set, it expects shell-style (#) comments instead.
|
||||
kUpb_TokenizerOption_CommentStyleShell = 1 << 5, |
||||
} upb_Tokenizer_Option; |
||||
|
||||
// Abstract interface for an object which collects the errors that occur
|
||||
// during parsing. A typical implementation might simply print the errors
|
||||
// to stdout.
|
||||
typedef struct { |
||||
// Indicates that there was an error in the input at the given line and
|
||||
// column numbers. The numbers are zero-based, so you may want to add
|
||||
// 1 to each before printing them.
|
||||
void (*AddError)(int line, int column, const char* message, void* context); |
||||
|
||||
// Indicates that there was a warning in the input at the given line and
|
||||
// column numbers. The numbers are zero-based, so you may want to add
|
||||
// 1 to each before printing them.
|
||||
void (*AddWarning)(int line, int column, const char* message, void* context); |
||||
|
||||
// Opaque pointer, passed an as argument to the above functions.
|
||||
void* context; |
||||
} upb_ErrorCollector; |
||||
|
||||
typedef struct upb_Tokenizer upb_Tokenizer; |
||||
|
||||
// Can be passed a flat array and/or a ZCIS as input.
|
||||
// The array will be read first (if non-NULL), then the stream (if non-NULL).
|
||||
upb_Tokenizer* upb_Tokenizer_New(const void* data, size_t size, |
||||
upb_ZeroCopyInputStream* input, |
||||
upb_ErrorCollector* error_collector, |
||||
int options, upb_Arena* arena); |
||||
|
||||
void upb_Tokenizer_Fini(upb_Tokenizer* t); |
||||
bool upb_Tokenizer_Next(upb_Tokenizer* t); |
||||
|
||||
// Accessors for inspecting current/previous parse tokens,
|
||||
// which are opaque to the tokenizer (to reduce copying).
|
||||
|
||||
upb_TokenType upb_Tokenizer_CurrentType(const upb_Tokenizer* t); |
||||
int upb_Tokenizer_CurrentColumn(const upb_Tokenizer* t); |
||||
int upb_Tokenizer_CurrentEndColumn(const upb_Tokenizer* t); |
||||
int upb_Tokenizer_CurrentLine(const upb_Tokenizer* t); |
||||
int upb_Tokenizer_CurrentTextSize(const upb_Tokenizer* t); |
||||
const char* upb_Tokenizer_CurrentTextData(const upb_Tokenizer* t); |
||||
|
||||
upb_TokenType upb_Tokenizer_PreviousType(const upb_Tokenizer* t); |
||||
int upb_Tokenizer_PreviousColumn(const upb_Tokenizer* t); |
||||
int upb_Tokenizer_PreviousEndColumn(const upb_Tokenizer* t); |
||||
int upb_Tokenizer_PreviousLine(const upb_Tokenizer* t); |
||||
|
||||
// Parses a TYPE_INTEGER token. Returns false if the result would be
|
||||
// greater than max_value. Otherwise, returns true and sets *output to the
|
||||
// result. If the text is not from a Token of type TYPE_INTEGER originally
|
||||
// parsed by a Tokenizer, the result is undefined (possibly an assert
|
||||
// failure).
|
||||
bool upb_Parse_Integer(const char* text, uint64_t max_value, uint64_t* output); |
||||
|
||||
// Parses a TYPE_FLOAT token. This never fails, so long as the text actually
|
||||
// comes from a TYPE_FLOAT token parsed by Tokenizer. If it doesn't, the
|
||||
// result is undefined (possibly an assert failure).
|
||||
double upb_Parse_Float(const char* text); |
||||
|
||||
// Identical to ParseString (below), but appends to output.
|
||||
void upb_Parse_StringAppend(const char* text, upb_String* output); |
||||
|
||||
// Parses a TYPE_STRING token. This never fails, so long as the text actually
|
||||
// comes from a TYPE_STRING token parsed by Tokenizer. If it doesn't, the
|
||||
// result is undefined (possibly an assert failure).
|
||||
UPB_INLINE void upb_Parse_String(const char* text, upb_String* output) { |
||||
upb_String_Clear(output); |
||||
upb_Parse_StringAppend(text, output); |
||||
} |
||||
|
||||
// External helper: validate an identifier.
|
||||
bool upb_Tokenizer_IsIdentifier(const char* text, int size); |
||||
|
||||
#ifdef __cplusplus |
||||
} /* extern "C" */ |
||||
#endif |
||||
|
||||
#include "upb/port_undef.inc" |
||||
|
||||
#endif // UPB_IO_TOKENIZER_H_
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue