Abseil Common Libraries (C++) (grcp 依赖)
https://abseil.io/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
511 lines
19 KiB
511 lines
19 KiB
// |
|
// Copyright 2017 The Abseil Authors. |
|
// |
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
|
// you may not use this file except in compliance with the License. |
|
// You may obtain a copy of the License at |
|
// |
|
// http://www.apache.org/licenses/LICENSE-2.0 |
|
// |
|
// Unless required by applicable law or agreed to in writing, software |
|
// distributed under the License is distributed on an "AS IS" BASIS, |
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
// See the License for the specific language governing permissions and |
|
// limitations under the License. |
|
// |
|
// ----------------------------------------------------------------------------- |
|
// File: str_split.h |
|
// ----------------------------------------------------------------------------- |
|
// |
|
// This file contains functions for splitting strings. It defines the main |
|
// `StrSplit()` function, several delimiters for determining the boundaries on |
|
// which to split the std::string, and predicates for filtering delimited results. |
|
// `StrSplit()` adapts the returned collection to the type specified by the |
|
// caller. |
|
// |
|
// Example: |
|
// |
|
// // Splits the given std::string on commas. Returns the results in a |
|
// // vector of strings. |
|
// std::vector<std::string> v = absl::StrSplit("a,b,c", ','); |
|
// // Can also use "," |
|
// // v[0] == "a", v[1] == "b", v[2] == "c" |
|
// |
|
// See StrSplit() below for more information. |
|
#ifndef ABSL_STRINGS_STR_SPLIT_H_ |
|
#define ABSL_STRINGS_STR_SPLIT_H_ |
|
|
|
#include <algorithm> |
|
#include <cstddef> |
|
#include <map> |
|
#include <set> |
|
#include <string> |
|
#include <utility> |
|
#include <vector> |
|
|
|
#include "absl/base/internal/raw_logging.h" |
|
#include "absl/strings/internal/str_split_internal.h" |
|
#include "absl/strings/string_view.h" |
|
#include "absl/strings/strip.h" |
|
|
|
namespace absl { |
|
|
|
//------------------------------------------------------------------------------ |
|
// Delimiters |
|
//------------------------------------------------------------------------------ |
|
// |
|
// `StrSplit()` uses delimiters to define the boundaries between elements in the |
|
// provided input. Several `Delimiter` types are defined below. If a std::string |
|
// (`const char*`, `std::string`, or `absl::string_view`) is passed in place of |
|
// an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it |
|
// were passed a `ByString` delimiter. |
|
// |
|
// A `Delimiter` is an object with a `Find()` function that knows how to find |
|
// the first occurrence of itself in a given `absl::string_view`. |
|
// |
|
// The following `Delimiter` types are available for use within `StrSplit()`: |
|
// |
|
// - `ByString` (default for std::string arguments) |
|
// - `ByChar` (default for a char argument) |
|
// - `ByAnyChar` |
|
// - `ByLength` |
|
// - `MaxSplits` |
|
// |
|
// |
|
// A Delimiter's Find() member function will be passed the input text that is to |
|
// be split and the position to begin searching for the next delimiter in the |
|
// input text. The returned absl::string_view should refer to the next |
|
// occurrence (after pos) of the represented delimiter; this returned |
|
// absl::string_view represents the next location where the input std::string should |
|
// be broken. The returned absl::string_view may be zero-length if the Delimiter |
|
// does not represent a part of the std::string (e.g., a fixed-length delimiter). If |
|
// no delimiter is found in the given text, a zero-length absl::string_view |
|
// referring to text.end() should be returned (e.g., |
|
// absl::string_view(text.end(), 0)). It is important that the returned |
|
// absl::string_view always be within the bounds of input text given as an |
|
// argument--it must not refer to a std::string that is physically located outside of |
|
// the given std::string. |
|
// |
|
// The following example is a simple Delimiter object that is created with a |
|
// single char and will look for that char in the text passed to the Find() |
|
// function: |
|
// |
|
// struct SimpleDelimiter { |
|
// const char c_; |
|
// explicit SimpleDelimiter(char c) : c_(c) {} |
|
// absl::string_view Find(absl::string_view text, size_t pos) { |
|
// auto found = text.find(c_, pos); |
|
// if (found == absl::string_view::npos) |
|
// return absl::string_view(text.end(), 0); |
|
// |
|
// return absl::string_view(text, found, 1); |
|
// } |
|
// }; |
|
|
|
// ByString |
|
// |
|
// A sub-std::string delimiter. If `StrSplit()` is passed a std::string in place of a |
|
// `Delimiter` object, the std::string will be implicitly converted into a |
|
// `ByString` delimiter. |
|
// |
|
// Example: |
|
// |
|
// // Because a std::string literal is converted to an `absl::ByString`, |
|
// // the following two splits are equivalent. |
|
// |
|
// std::vector<std::string> v1 = absl::StrSplit("a, b, c", ", "); |
|
// |
|
// using absl::ByString; |
|
// std::vector<std::string> v2 = absl::StrSplit("a, b, c", |
|
// ByString(", ")); |
|
// // v[0] == "a", v[1] == "b", v[2] == "c" |
|
class ByString { |
|
public: |
|
explicit ByString(absl::string_view sp); |
|
absl::string_view Find(absl::string_view text, size_t pos) const; |
|
|
|
private: |
|
const std::string delimiter_; |
|
}; |
|
|
|
// ByChar |
|
// |
|
// A single character delimiter. `ByChar` is functionally equivalent to a |
|
// 1-char std::string within a `ByString` delimiter, but slightly more |
|
// efficient. |
|
// |
|
// Example: |
|
// |
|
// // Because a char literal is converted to a absl::ByChar, |
|
// // the following two splits are equivalent. |
|
// std::vector<std::string> v1 = absl::StrSplit("a,b,c", ','); |
|
// using absl::ByChar; |
|
// std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(',')); |
|
// // v[0] == "a", v[1] == "b", v[2] == "c" |
|
// |
|
// `ByChar` is also the default delimiter if a single character is given |
|
// as the delimiter to `StrSplit()`. For example, the following calls are |
|
// equivalent: |
|
// |
|
// std::vector<std::string> v = absl::StrSplit("a-b", '-'); |
|
// |
|
// using absl::ByChar; |
|
// std::vector<std::string> v = absl::StrSplit("a-b", ByChar('-')); |
|
// |
|
class ByChar { |
|
public: |
|
explicit ByChar(char c) : c_(c) {} |
|
absl::string_view Find(absl::string_view text, size_t pos) const; |
|
|
|
private: |
|
char c_; |
|
}; |
|
|
|
// ByAnyChar |
|
// |
|
// A delimiter that will match any of the given byte-sized characters within |
|
// its provided std::string. |
|
// |
|
// Note: this delimiter works with single-byte std::string data, but does not work |
|
// with variable-width encodings, such as UTF-8. |
|
// |
|
// Example: |
|
// |
|
// using absl::ByAnyChar; |
|
// std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",=")); |
|
// // v[0] == "a", v[1] == "b", v[2] == "c" |
|
// |
|
// If `ByAnyChar` is given the empty std::string, it behaves exactly like |
|
// `ByString` and matches each individual character in the input std::string. |
|
// |
|
class ByAnyChar { |
|
public: |
|
explicit ByAnyChar(absl::string_view sp); |
|
absl::string_view Find(absl::string_view text, size_t pos) const; |
|
|
|
private: |
|
const std::string delimiters_; |
|
}; |
|
|
|
// ByLength |
|
// |
|
// A delimiter for splitting into equal-length strings. The length argument to |
|
// the constructor must be greater than 0. |
|
// |
|
// Note: this delimiter works with single-byte std::string data, but does not work |
|
// with variable-width encodings, such as UTF-8. |
|
// |
|
// Example: |
|
// |
|
// using absl::ByLength; |
|
// std::vector<std::string> v = absl::StrSplit("123456789", ByLength(3)); |
|
|
|
// // v[0] == "123", v[1] == "456", v[2] == "789" |
|
// |
|
// Note that the std::string does not have to be a multiple of the fixed split |
|
// length. In such a case, the last substring will be shorter. |
|
// |
|
// using absl::ByLength; |
|
// std::vector<std::string> v = absl::StrSplit("12345", ByLength(2)); |
|
// |
|
// // v[0] == "12", v[1] == "35", v[2] == "5" |
|
class ByLength { |
|
public: |
|
explicit ByLength(ptrdiff_t length); |
|
absl::string_view Find(absl::string_view text, size_t pos) const; |
|
|
|
private: |
|
const ptrdiff_t length_; |
|
}; |
|
|
|
namespace strings_internal { |
|
|
|
// A traits-like metafunction for selecting the default Delimiter object type |
|
// for a particular Delimiter type. The base case simply exposes type Delimiter |
|
// itself as the delimiter's Type. However, there are specializations for |
|
// std::string-like objects that map them to the ByString delimiter object. |
|
// This allows functions like absl::StrSplit() and absl::MaxSplits() to accept |
|
// std::string-like objects (e.g., ',') as delimiter arguments but they will be |
|
// treated as if a ByString delimiter was given. |
|
template <typename Delimiter> |
|
struct SelectDelimiter { |
|
using type = Delimiter; |
|
}; |
|
|
|
template <> |
|
struct SelectDelimiter<char> { |
|
using type = ByChar; |
|
}; |
|
template <> |
|
struct SelectDelimiter<char*> { |
|
using type = ByString; |
|
}; |
|
template <> |
|
struct SelectDelimiter<const char*> { |
|
using type = ByString; |
|
}; |
|
template <> |
|
struct SelectDelimiter<absl::string_view> { |
|
using type = ByString; |
|
}; |
|
template <> |
|
struct SelectDelimiter<std::string> { |
|
using type = ByString; |
|
}; |
|
|
|
// Wraps another delimiter and sets a max number of matches for that delimiter. |
|
template <typename Delimiter> |
|
class MaxSplitsImpl { |
|
public: |
|
MaxSplitsImpl(Delimiter delimiter, int limit) |
|
: delimiter_(delimiter), limit_(limit), count_(0) {} |
|
absl::string_view Find(absl::string_view text, size_t pos) { |
|
if (count_++ == limit_) { |
|
return absl::string_view(text.end(), 0); // No more matches. |
|
} |
|
return delimiter_.Find(text, pos); |
|
} |
|
|
|
private: |
|
Delimiter delimiter_; |
|
const int limit_; |
|
int count_; |
|
}; |
|
|
|
} // namespace strings_internal |
|
|
|
// MaxSplits() |
|
// |
|
// A delimiter that limits the number of matches which can occur to the passed |
|
// `limit`. The last element in the returned collection will contain all |
|
// remaining unsplit pieces, which may contain instances of the delimiter. |
|
// The collection will contain at most `limit` + 1 elements. |
|
// Example: |
|
// |
|
// using absl::MaxSplits; |
|
// std::vector<std::string> v = absl::StrSplit("a,b,c", MaxSplits(',', 1)); |
|
// |
|
// // v[0] == "a", v[1] == "b,c" |
|
template <typename Delimiter> |
|
inline strings_internal::MaxSplitsImpl< |
|
typename strings_internal::SelectDelimiter<Delimiter>::type> |
|
MaxSplits(Delimiter delimiter, int limit) { |
|
typedef |
|
typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType; |
|
return strings_internal::MaxSplitsImpl<DelimiterType>( |
|
DelimiterType(delimiter), limit); |
|
} |
|
|
|
//------------------------------------------------------------------------------ |
|
// Predicates |
|
//------------------------------------------------------------------------------ |
|
// |
|
// Predicates filter the results of a `StrSplit()` by determining whether or not |
|
// a resultant element is included in the result set. A predicate may be passed |
|
// as an optional third argument to the `StrSplit()` function. |
|
// |
|
// Predicates are unary functions (or functors) that take a single |
|
// `absl::string_view` argument and return a bool indicating whether the |
|
// argument should be included (`true`) or excluded (`false`). |
|
// |
|
// Predicates are useful when filtering out empty substrings. By default, empty |
|
// substrings may be returned by `StrSplit()`, which is similar to the way split |
|
// functions work in other programming languages. |
|
|
|
// AllowEmpty() |
|
// |
|
// Always returns `true`, indicating that all strings--including empty |
|
// strings--should be included in the split output. This predicate is not |
|
// strictly needed because this is the default behavior of `StrSplit()`; |
|
// however, it might be useful at some call sites to make the intent explicit. |
|
// |
|
// Example: |
|
// |
|
// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', AllowEmpty()); |
|
// |
|
// // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == "" |
|
struct AllowEmpty { |
|
bool operator()(absl::string_view) const { return true; } |
|
}; |
|
|
|
// SkipEmpty() |
|
// |
|
// Returns `false` if the given `absl::string_view` is empty, indicating that |
|
// `StrSplit()` should omit the empty std::string. |
|
// |
|
// Example: |
|
// |
|
// std::vector<std::string> v = absl::StrSplit(",a,,b,", ',', SkipEmpty()); |
|
// |
|
// // v[0] == "a", v[1] == "b" |
|
// |
|
// Note: `SkipEmpty()` does not consider a std::string containing only whitespace |
|
// to be empty. To skip such whitespace as well, use the `SkipWhitespace()` |
|
// predicate. |
|
struct SkipEmpty { |
|
bool operator()(absl::string_view sp) const { return !sp.empty(); } |
|
}; |
|
|
|
// SkipWhitespace() |
|
// |
|
// Returns `false` if the given `absl::string_view` is empty *or* contains only |
|
// whitespace, indicating that `StrSplit()` should omit the std::string. |
|
// |
|
// Example: |
|
// |
|
// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", |
|
// ',', SkipWhitespace()); |
|
// // v[0] == " a ", v[1] == "b" |
|
// |
|
// // SkipEmpty() would return whitespace elements |
|
// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', SkipEmpty()); |
|
// // v[0] == " a ", v[1] == " ", v[2] == "b" |
|
struct SkipWhitespace { |
|
bool operator()(absl::string_view sp) const { |
|
sp = absl::StripAsciiWhitespace(sp); |
|
return !sp.empty(); |
|
} |
|
}; |
|
|
|
//------------------------------------------------------------------------------ |
|
// StrSplit() |
|
//------------------------------------------------------------------------------ |
|
|
|
// StrSplit() |
|
// |
|
// Splits a given `std::string` based on the provided `Delimiter` object, |
|
// returning the elements within the type specified by the caller. Optionally, |
|
// you may also pass a `Predicate` to `StrSplit()` indicating whether to include |
|
// or exclude the resulting element within the final result set. (See the |
|
// overviews for Delimiters and Predicates above.) |
|
// |
|
// Example: |
|
// |
|
// std::vector<std::string> v = absl::StrSplit("a,b,c,d", ','); |
|
// // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d" |
|
// |
|
// You can also provide an explicit `Delimiter` object: |
|
// |
|
// Example: |
|
// |
|
// using absl::ByAnyChar; |
|
// std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",=")); |
|
// // v[0] == "a", v[1] == "b", v[2] == "c" |
|
// |
|
// See above for more information on delimiters. |
|
// |
|
// By default, empty strings are included in the result set. You can optionally |
|
// include a third `Predicate` argument to apply a test for whether the |
|
// resultant element should be included in the result set: |
|
// |
|
// Example: |
|
// |
|
// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", |
|
// ',', SkipWhitespace()); |
|
// // v[0] == "a", v[1] == "b" |
|
// |
|
// See above for more information on predicates. |
|
// |
|
//------------------------------------------------------------------------------ |
|
// StrSplit() Return Types |
|
//------------------------------------------------------------------------------ |
|
// |
|
// The `StrSplit()` function adapts the returned collection to the collection |
|
// specified by the caller (e.g. `std::vector` above). The returned collections |
|
// may contain `string`, `absl::string_view` (in which case the original std::string |
|
// being split must ensure that it outlives the collection), or any object that |
|
// can be explicitly created from an `absl::string_view`. This behavior works |
|
// for: |
|
// |
|
// 1) All standard STL containers including `std::vector`, `std::list`, |
|
// `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap` |
|
// 2) `std::pair` (which is not actually a container). See below. |
|
// |
|
// Example: |
|
// |
|
// // The results are returned as `absl::string_view` objects. Note that we |
|
// // have to ensure that the input std::string outlives any results. |
|
// std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ','); |
|
// |
|
// // Stores results in a std::set<std::string>, which also performs |
|
// // de-duplication and orders the elements in ascending order. |
|
// std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ','); |
|
// // v[0] == "a", v[1] == "b", v[2] = "c" |
|
// |
|
// // `StrSplit()` can be used within a range-based for loop, in which case |
|
// // each element will be of type `absl::string_view`. |
|
// std::vector<std::string> v; |
|
// for (const auto sv : absl::StrSplit("a,b,c", ',')) { |
|
// if (sv != "b") v.emplace_back(sv); |
|
// } |
|
// // v[0] == "a", v[1] == "c" |
|
// |
|
// // Stores results in a map. The map implementation assumes that the input |
|
// // is provided as a series of key/value pairs. For example, the 0th element |
|
// // resulting from the split will be stored as a key to the 1st element. If |
|
// // an odd number of elements are resolved, the last element is paired with |
|
// // a default-constructed value (e.g., empty std::string). |
|
// std::map<std::string, std::string> m = absl::StrSplit("a,b,c", ','); |
|
// // m["a"] == "b", m["c"] == "" // last component value equals "" |
|
// |
|
// Splitting to `std::pair` is an interesting case because it can hold only two |
|
// elements and is not a collection type. When splitting to a `std::pair` the |
|
// first two split strings become the `std::pair` `.first` and `.second` |
|
// members, respectively. The remaining split substrings are discarded. If there |
|
// are less than two split substrings, the empty std::string is used for the |
|
// corresponding |
|
// `std::pair` member. |
|
// |
|
// Example: |
|
// |
|
// // Stores first two split strings as the members in a std::pair. |
|
// std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ','); |
|
// // p.first == "a", p.second == "b" // "c" is omitted. |
|
// |
|
// The `StrSplit()` function can be used multiple times to perform more |
|
// complicated splitting logic, such as intelligently parsing key-value pairs. |
|
// |
|
// Example: |
|
// |
|
// // The input std::string "a=b=c,d=e,f=,g" becomes |
|
// // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" } |
|
// std::map<std::string, std::string> m; |
|
// for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) { |
|
// m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1))); |
|
// } |
|
// EXPECT_EQ("b=c", m.find("a")->second); |
|
// EXPECT_EQ("e", m.find("d")->second); |
|
// EXPECT_EQ("", m.find("f")->second); |
|
// EXPECT_EQ("", m.find("g")->second); |
|
// |
|
// WARNING: Due to a legacy bug that is maintained for backward compatibility, |
|
// splitting the following empty string_views produces different results: |
|
// |
|
// absl::StrSplit(absl::string_view(""), '-'); // {""} |
|
// absl::StrSplit(absl::string_view(), '-'); // {}, but should be {""} |
|
// |
|
// Try not to depend on this distinction because the bug may one day be fixed. |
|
template <typename Delimiter> |
|
strings_internal::Splitter< |
|
typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty> |
|
StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) { |
|
using DelimiterType = |
|
typename strings_internal::SelectDelimiter<Delimiter>::type; |
|
return strings_internal::Splitter<DelimiterType, AllowEmpty>( |
|
std::move(text), DelimiterType(d), AllowEmpty()); |
|
} |
|
|
|
template <typename Delimiter, typename Predicate> |
|
strings_internal::Splitter< |
|
typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate> |
|
StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d, |
|
Predicate p) { |
|
using DelimiterType = |
|
typename strings_internal::SelectDelimiter<Delimiter>::type; |
|
return strings_internal::Splitter<DelimiterType, Predicate>( |
|
std::move(text), DelimiterType(d), std::move(p)); |
|
} |
|
|
|
} // namespace absl |
|
|
|
#endif // ABSL_STRINGS_STR_SPLIT_H_
|
|
|