|
|
|
//
|
|
|
|
// Copyright 2017 The Abseil Authors.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
//
|
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
// File: ascii.h
|
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
//
|
|
|
|
// This package contains functions operating on characters and strings
|
|
|
|
// restricted to standard ASCII. These include character classification
|
|
|
|
// functions analogous to those found in the ANSI C Standard Library <ctype.h>
|
|
|
|
// header file.
|
|
|
|
//
|
|
|
|
// C++ implementations provide <ctype.h> functionality based on their
|
|
|
|
// C environment locale. In general, reliance on such a locale is not ideal, as
|
|
|
|
// the locale standard is problematic (and may not return invariant information
|
|
|
|
// for the same character set, for example). These `ascii_*()` functions are
|
|
|
|
// hard-wired for standard ASCII, much faster, and guaranteed to behave
|
|
|
|
// consistently. They will never be overloaded, nor will their function
|
|
|
|
// signature change.
|
|
|
|
//
|
|
|
|
// `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`,
|
|
|
|
// `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`,
|
|
|
|
// `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`,
|
|
|
|
// `ascii_isxdigit()`
|
|
|
|
// Analogous to the <ctype.h> functions with similar names, these
|
|
|
|
// functions take an unsigned char and return a bool, based on whether the
|
|
|
|
// character matches the condition specified.
|
|
|
|
//
|
|
|
|
// If the input character has a numerical value greater than 127, these
|
|
|
|
// functions return `false`.
|
|
|
|
//
|
|
|
|
// `ascii_tolower()`, `ascii_toupper()`
|
|
|
|
// Analogous to the <ctype.h> functions with similar names, these functions
|
|
|
|
// take an unsigned char and return a char.
|
|
|
|
//
|
|
|
|
// If the input character is not an ASCII {lower,upper}-case letter (including
|
|
|
|
// numerical values greater than 127) then the functions return the same value
|
|
|
|
// as the input character.
|
|
|
|
|
|
|
|
#ifndef ABSL_STRINGS_ASCII_H_
|
|
|
|
#define ABSL_STRINGS_ASCII_H_
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <string>
|
|
|
|
|
|
|
|
#include "absl/base/attributes.h"
|
|
|
|
#include "absl/base/config.h"
|
|
|
|
#include "absl/strings/string_view.h"
|
|
|
|
|
|
|
|
namespace absl {
|
|
|
|
ABSL_NAMESPACE_BEGIN
|
|
|
|
namespace ascii_internal {
|
|
|
|
|
|
|
|
// Declaration for an array of bitfields holding character information.
|
|
|
|
ABSL_DLL extern const unsigned char kPropertyBits[256];
|
|
|
|
|
|
|
|
// Declaration for the array of characters to upper-case characters.
|
|
|
|
ABSL_DLL extern const char kToUpper[256];
|
|
|
|
|
|
|
|
// Declaration for the array of characters to lower-case characters.
|
|
|
|
ABSL_DLL extern const char kToLower[256];
|
|
|
|
|
|
|
|
} // namespace ascii_internal
|
|
|
|
|
|
|
|
// ascii_isalpha()
|
|
|
|
//
|
|
|
|
// Determines whether the given character is an alphabetic character.
|
|
|
|
inline bool ascii_isalpha(unsigned char c) {
|
|
|
|
return (ascii_internal::kPropertyBits[c] & 0x01) != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ascii_isalnum()
|
|
|
|
//
|
|
|
|
// Determines whether the given character is an alphanumeric character.
|
|
|
|
inline bool ascii_isalnum(unsigned char c) {
|
|
|
|
return (ascii_internal::kPropertyBits[c] & 0x04) != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ascii_isspace()
|
|
|
|
//
|
|
|
|
// Determines whether the given character is a whitespace character (space,
|
|
|
|
// tab, vertical tab, formfeed, linefeed, or carriage return).
|
|
|
|
inline bool ascii_isspace(unsigned char c) {
|
|
|
|
return (ascii_internal::kPropertyBits[c] & 0x08) != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ascii_ispunct()
|
|
|
|
//
|
|
|
|
// Determines whether the given character is a punctuation character.
|
|
|
|
inline bool ascii_ispunct(unsigned char c) {
|
|
|
|
return (ascii_internal::kPropertyBits[c] & 0x10) != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ascii_isblank()
|
|
|
|
//
|
|
|
|
// Determines whether the given character is a blank character (tab or space).
|
|
|
|
inline bool ascii_isblank(unsigned char c) {
|
|
|
|
return (ascii_internal::kPropertyBits[c] & 0x20) != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ascii_iscntrl()
|
|
|
|
//
|
|
|
|
// Determines whether the given character is a control character.
|
|
|
|
inline bool ascii_iscntrl(unsigned char c) {
|
|
|
|
return (ascii_internal::kPropertyBits[c] & 0x40) != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ascii_isxdigit()
|
|
|
|
//
|
|
|
|
// Determines whether the given character can be represented as a hexadecimal
|
|
|
|
// digit character (i.e. {0-9} or {A-F}).
|
|
|
|
inline bool ascii_isxdigit(unsigned char c) {
|
|
|
|
return (ascii_internal::kPropertyBits[c] & 0x80) != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ascii_isdigit()
|
|
|
|
//
|
|
|
|
// Determines whether the given character can be represented as a decimal
|
|
|
|
// digit character (i.e. {0-9}).
|
|
|
|
inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; }
|
|
|
|
|
|
|
|
// ascii_isprint()
|
|
|
|
//
|
Export of internal Abseil changes
--
07240ca7822d007cdcc79f2c40bd58b2c2010348 by Abseil Team <absl-team@google.com>:
Correct the comment from "AlphaNum" to "Arg".
PiperOrigin-RevId: 416139192
--
adcba4a6b3763626e1db7b1e8c108b3114903557 by Martijn Vels <mvels@google.com>:
Fix NewExternalRep() to require data being non-empty, and remove nullptr return.
PiperOrigin-RevId: 416135865
--
c0d14cd918fb16f15d1d84de9284b5c5ecc1f8f2 by Abseil Team <absl-team@google.com>:
Fix doc comment for absl::ascii_isprint().
The comment was incorrectly saying that it includes all whitespace.
It doesn't; the only whitespace char it includes is ' '.
PiperOrigin-RevId: 416112524
--
d83327800159c07002b6865e21232a12463e02dd by Abseil Team <absl-team@google.com>:
Internal change
PiperOrigin-RevId: 416099978
--
baf11e9ca42ca9140cdbf8075f971db8d65b1195 by Ilya Tokar <tokarip@google.com>:
Prevent compiler from optimizing Group_Match* benchmarks away.
Currently we benchmark single store of precomputed value.
Not all affected benchmarks show performance changes:
BM_Group_Match 0.53ns ± 1% 0.53ns ± 0% -0.42% (p=0.038 n=10+10)
BM_Group_MatchEmpty 0.26ns ± 1% 0.26ns ± 1% ~ (p=1.000 n=10+10)
BM_Group_MatchEmptyOrDeleted 0.26ns ± 1% 0.26ns ± 1% ~ (p=0.121 n=10+10)
BM_Group_CountLeadingEmptyOrDeleted 0.26ns ± 1% 0.45ns ± 0% +70.05% (p=0.000 n=10+8)
BM_Group_MatchFirstEmptyOrDeleted 0.26ns ± 0% 0.44ns ± 1% +65.91% (p=0.000 n=8+9)
But inspecting the generated code shows the difference,
e. g. BM_Group_MatchFirstEmptyOrDeleted
Before:
add $0xffffffffffffffff,%rbx
jne 30
After:
pcmpeqd %xmm0,%xmm0
pcmpgtb -0x30(%rbp),%xmm0
pmovmskb %xmm0,%eax
add: 0x23$0xffffffffffffffff,%rbx
jne 40
PiperOrigin-RevId: 416083515
--
122fbff893dc4571b3e75e4b241eb4495b925610 by Abseil Team <absl-team@google.com>:
Put namespace guard in ABSL_DECLARE_FLAG to make declaring a flag in a namespace a compiler error instead of a linker error.
PiperOrigin-RevId: 416036072
--
020fd8a20f5fa319e948846e003391fcb9e03868 by Ilya Tokar <tokarip@google.com>:
Make Cord::InlineRep::set_data unconditionally zero out memory.
Currently there is a single case where we don't zero out memory
as an optimization. Unconditional zeroing doesn't show any changes
in benchmarks, except for the unrelated improvement:
BM_CordPartialCopyToCord/1M/1 12.6ns ± 4% 12.6ns ± 4% ~ (p=0.857 n=16+19)
BM_CordPartialCopyToCord/1M/128 44.9ns ± 7% 45.0ns ± 3% ~ (p=0.468 n=18+17)
BM_CordPartialCopyToCord/1M/1k 64.5ns ± 4% 61.4ns ± 4% -4.82% (p=0.000 n=19+17)
BM_CordPartialCopyToCord/1M/8k 139ns ± 3% 128ns ±15% -7.76% (p=0.009 n=17+20)
BM_CordPartialCopyToCord/1M/16k 193ns ± 6% 168ns ± 6% -13.17% (p=0.000 n=17+17)
BM_CordPartialCopyToCord/4M/16k 199ns ± 4% 177ns ± 4% -11.36% (p=0.000 n=17+18)
BM_CordPartialCopyToCord/4M/32k 275ns ± 3% 250ns ± 4% -9.00% (p=0.000 n=18+18)
BM_CordPartialCopyToCord/4M/64k 291ns ± 4% 266ns ± 5% -8.53% (p=0.000 n=18+16)
BM_CordPartialCopyToCord/4M/128k 322ns ± 5% 291ns ± 4% -9.43% (p=0.000 n=20+18)
BM_CordPartialCopyToCord/8M/32k 281ns ± 5% 251ns ± 4% -10.38% (p=0.000 n=20+16)
BM_CordPartialCopyToCord/8M/64k 293ns ± 6% 267ns ± 4% -8.87% (p=0.000 n=16+19)
BM_CordPartialCopyToCord/8M/128k 334ns ± 3% 305ns ± 2% -8.56% (p=0.000 n=17+16)
This is clearly an alignmnet effect since number of the executed instructions is the same:
M_CordPartialCopyToCord/1M/1 155 ± 0% 155 ± 0% ~ (all samples are equal)
BM_CordPartialCopyToCord/1M/128 446 ± 0% 446 ± 0% ~ (p=0.332 n=36+39)
BM_CordPartialCopyToCord/1M/1k 473 ± 0% 473 ± 0% ~ (p=0.969 n=40+40)
BM_CordPartialCopyToCord/1M/8k 808 ± 0% 808 ± 0% ~ (p=0.127 n=40+39)
BM_CordPartialCopyToCord/1M/16k 957 ± 0% 957 ± 0% ~ (p=0.532 n=40+40)
BM_CordPartialCopyToCord/4M/16k 952 ± 0% 952 ± 0% ~ (p=0.686 n=39+39)
BM_CordPartialCopyToCord/4M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.690 n=40+40)
BM_CordPartialCopyToCord/4M/64k 1.23k ± 0% 1.23k ± 0% ~ (p=0.182 n=40+39)
BM_CordPartialCopyToCord/4M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.711 n=40+40)
BM_CordPartialCopyToCord/8M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.697 n=40+40)
BM_CordPartialCopyToCord/8M/64k 1.23k ± 0% 1.23k ± 0% +0.00% (p=0.049 n=40+40)
BM_CordPartialCopyToCord/8M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.507 n=40+40)
This makes code simpler and doesn't regress performance.
PiperOrigin-RevId: 415560574
--
37305b2690b31682088749e4d62f40d7095bdc54 by Derek Mauro <dmauro@google.com>:
Internal change
PiperOrigin-RevId: 415558737
--
86aaed569b9e743c1eb813a5f48def978a793db3 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415515201
--
6cdb8786cdcb4fa0b8a4b72fc98940877d1fdeff by Abseil Team <absl-team@google.com>:
Update SubmitMutexProfileData to accept wait_cycles instead of wait_timestamp
PiperOrigin-RevId: 415360871
--
9f979d307aa16ad09f214e04876cbe84395c0901 by Abseil Team <absl-team@google.com>:
absl::flat_hash_set compiles with -Wconversion -Wsign-compare
PiperOrigin-RevId: 415357498
--
9eceb14174708f15e61259d449b214a8a4c7f9e7 by Abseil Team <absl-team@google.com>:
Fix AddressIsReadable for the corner case of (aligned) addr == NULL.
PiperOrigin-RevId: 415307792
--
1a39ffe55898375e2d7f88c17c99db5a1b95b313 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415162872
--
64378549b110d5f5762185a5906c520fba70f0e7 by Abseil Team <absl-team@google.com>:
Fix a typo in the comments
PiperOrigin-RevId: 415088461
--
41aae8322e913b82710153c22b97c611fdb6e1fb by Abseil Team <absl-team@google.com>:
Switch from `connect` to `rt_sigreturn` -- the latter is much less problematic
for system call sandboxes.
PiperOrigin-RevId: 415073965
--
870c5e3388b6a35611bff538626fe7a1c8c87171 by Abseil Team <absl-team@google.com>:
Add ABSL_HAVE_HWADDRESS_SANITIZER and ABSL_HAVE_LEAK_SANITIZER
PiperOrigin-RevId: 414871189
--
f213ed60a66b58da7ac40555adfb1d529ff0a4db by Derek Mauro <dmauro@google.com>:
Remove reference to __SANITIZE_MEMORY__, which does not exist
It appears to have been copied by pattern matching from the ASAN/TSAN
code blocks.
https://github.com/gcc-mirror/gcc/blob/f47662204de27f7685699eeef89aa173ccf32d85/gcc/cppbuiltin.c#L79-L126
PiperOrigin-RevId: 414806587
--
b152891e73ab515f397ceb53f66c8ee2f33863ea by Abseil Team <absl-team@google.com>:
Rollback previous commit: SYS_open is not defined in certain environments.
PiperOrigin-RevId: 414521820
--
5a1cbb282331023902e1374dd0d920c4effbe47f by Abseil Team <absl-team@google.com>:
Use syscall(SYS_open, ...) instead of open() to avoid possible symbol
interposition.
Also add some warning notes.
PiperOrigin-RevId: 414508186
--
1824d6593612710aafdc599a89b0adced7d787f6 by Abseil Team <absl-team@google.com>:
Correct aarch64 macro check
The macro is __aarch64__, not __arch64__.
PiperOrigin-RevId: 414446225
--
a1536a57b64dfd53945d33a01cfc08b18c99c97b by Abseil Team <absl-team@google.com>:
Fix backwards comment in the last commit.
PiperOrigin-RevId: 414281214
--
11ac021ba779513667a31cf2563ddafc57d6d913 by Abseil Team <absl-team@google.com>:
AddressIsReadable() didn't work correctly on ARM when the given pointer was
misaligned at the end of the page.
Fix that by aligning the pointer on an 8-byte boundary before checking it.
PiperOrigin-RevId: 414203863
GitOrigin-RevId: 07240ca7822d007cdcc79f2c40bd58b2c2010348
Change-Id: If5f129194d59f5c9e5d84efd8cd9e17a70e072ab
3 years ago
|
|
|
// Determines whether the given character is printable, including spaces.
|
|
|
|
inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; }
|
|
|
|
|
|
|
|
// ascii_isgraph()
|
|
|
|
//
|
|
|
|
// Determines whether the given character has a graphical representation.
|
|
|
|
inline bool ascii_isgraph(unsigned char c) { return c > 32 && c < 127; }
|
|
|
|
|
|
|
|
// ascii_isupper()
|
|
|
|
//
|
|
|
|
// Determines whether the given character is uppercase.
|
|
|
|
inline bool ascii_isupper(unsigned char c) { return c >= 'A' && c <= 'Z'; }
|
|
|
|
|
|
|
|
// ascii_islower()
|
|
|
|
//
|
|
|
|
// Determines whether the given character is lowercase.
|
|
|
|
inline bool ascii_islower(unsigned char c) { return c >= 'a' && c <= 'z'; }
|
|
|
|
|
|
|
|
// ascii_isascii()
|
|
|
|
//
|
|
|
|
// Determines whether the given character is ASCII.
|
|
|
|
inline bool ascii_isascii(unsigned char c) { return c < 128; }
|
|
|
|
|
|
|
|
// ascii_tolower()
|
|
|
|
//
|
|
|
|
// Returns an ASCII character, converting to lowercase if uppercase is
|
|
|
|
// passed. Note that character values > 127 are simply returned.
|
|
|
|
inline char ascii_tolower(unsigned char c) {
|
|
|
|
return ascii_internal::kToLower[c];
|
|
|
|
}
|
|
|
|
|
|
|
|
// Converts the characters in `s` to lowercase, changing the contents of `s`.
|
|
|
|
void AsciiStrToLower(std::string* s);
|
|
|
|
|
|
|
|
// Creates a lowercase string from a given absl::string_view.
|
|
|
|
ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) {
|
|
|
|
std::string result(s);
|
|
|
|
absl::AsciiStrToLower(&result);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ascii_toupper()
|
|
|
|
//
|
|
|
|
// Returns the ASCII character, converting to upper-case if lower-case is
|
|
|
|
// passed. Note that characters values > 127 are simply returned.
|
|
|
|
inline char ascii_toupper(unsigned char c) {
|
|
|
|
return ascii_internal::kToUpper[c];
|
|
|
|
}
|
|
|
|
|
|
|
|
// Converts the characters in `s` to uppercase, changing the contents of `s`.
|
|
|
|
void AsciiStrToUpper(std::string* s);
|
|
|
|
|
|
|
|
// Creates an uppercase string from a given absl::string_view.
|
|
|
|
ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) {
|
|
|
|
std::string result(s);
|
|
|
|
absl::AsciiStrToUpper(&result);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns absl::string_view with whitespace stripped from the beginning of the
|
|
|
|
// given string_view.
|
|
|
|
ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace(
|
|
|
|
absl::string_view str) {
|
|
|
|
auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace);
|
|
|
|
return str.substr(it - str.begin());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Strips in place whitespace from the beginning of the given string.
|
|
|
|
inline void StripLeadingAsciiWhitespace(std::string* str) {
|
|
|
|
auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace);
|
|
|
|
str->erase(str->begin(), it);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns absl::string_view with whitespace stripped from the end of the given
|
|
|
|
// string_view.
|
|
|
|
ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace(
|
|
|
|
absl::string_view str) {
|
|
|
|
auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace);
|
|
|
|
return str.substr(0, str.rend() - it);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Strips in place whitespace from the end of the given string
|
|
|
|
inline void StripTrailingAsciiWhitespace(std::string* str) {
|
|
|
|
auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace);
|
|
|
|
str->erase(str->rend() - it);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns absl::string_view with whitespace stripped from both ends of the
|
|
|
|
// given string_view.
|
|
|
|
ABSL_MUST_USE_RESULT inline absl::string_view StripAsciiWhitespace(
|
|
|
|
absl::string_view str) {
|
|
|
|
return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Strips in place whitespace from both ends of the given string
|
|
|
|
inline void StripAsciiWhitespace(std::string* str) {
|
|
|
|
StripTrailingAsciiWhitespace(str);
|
|
|
|
StripLeadingAsciiWhitespace(str);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Removes leading, trailing, and consecutive internal whitespace.
|
|
|
|
void RemoveExtraAsciiWhitespace(std::string*);
|
|
|
|
|
|
|
|
ABSL_NAMESPACE_END
|
|
|
|
} // namespace absl
|
|
|
|
|
|
|
|
#endif // ABSL_STRINGS_ASCII_H_
|