|
|
|
// Copyright 2020 The Abseil Authors.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
|
|
|
|
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
|
|
|
|
|
|
|
|
#include <limits.h>
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
|
|
#include <cassert>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <initializer_list>
|
|
|
|
#include <iosfwd>
|
|
|
|
#include <iterator>
|
|
|
|
#include <memory>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include "absl/strings/internal/str_format/checker.h"
|
|
|
|
#include "absl/strings/internal/str_format/extension.h"
|
|
|
|
|
|
|
|
namespace absl {
|
|
|
|
ABSL_NAMESPACE_BEGIN
|
|
|
|
namespace str_format_internal {
|
|
|
|
|
|
|
|
enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none };
|
|
|
|
|
|
|
|
std::string LengthModToString(LengthMod v);
|
|
|
|
|
|
|
|
// The analyzed properties of a single specified conversion.
|
|
|
|
struct UnboundConversion {
|
|
|
|
UnboundConversion()
|
|
|
|
: flags() /* This is required to zero all the fields of flags. */ {
|
|
|
|
flags.basic = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
class InputValue {
|
|
|
|
public:
|
|
|
|
void set_value(int value) {
|
|
|
|
assert(value >= 0);
|
|
|
|
value_ = value;
|
|
|
|
}
|
|
|
|
int value() const { return value_; }
|
|
|
|
|
|
|
|
// Marks the value as "from arg". aka the '*' format.
|
|
|
|
// Requires `value >= 1`.
|
|
|
|
// When set, is_from_arg() return true and get_from_arg() returns the
|
|
|
|
// original value.
|
|
|
|
// `value()`'s return value is unspecfied in this state.
|
|
|
|
void set_from_arg(int value) {
|
|
|
|
assert(value > 0);
|
|
|
|
value_ = -value - 1;
|
|
|
|
}
|
|
|
|
bool is_from_arg() const { return value_ < -1; }
|
|
|
|
int get_from_arg() const {
|
|
|
|
assert(is_from_arg());
|
|
|
|
return -value_ - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
int value_ = -1;
|
|
|
|
};
|
|
|
|
|
|
|
|
// No need to initialize. It will always be set in the parser.
|
|
|
|
int arg_position;
|
|
|
|
|
|
|
|
InputValue width;
|
|
|
|
InputValue precision;
|
|
|
|
|
|
|
|
Flags flags;
|
|
|
|
LengthMod length_mod = LengthMod::none;
|
|
|
|
FormatConversionChar conv = FormatConversionCharInternal::kNone;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Consume conversion spec prefix (not including '%') of [p, end) if valid.
|
|
|
|
// Examples of valid specs would be e.g.: "s", "d", "-12.6f".
|
|
|
|
// If valid, it returns the first character following the conversion spec,
|
|
|
|
// and the spec part is broken down and returned in 'conv'.
|
|
|
|
// If invalid, returns nullptr.
|
|
|
|
const char* ConsumeUnboundConversion(const char* p, const char* end,
|
|
|
|
UnboundConversion* conv, int* next_arg);
|
|
|
|
|
|
|
|
// Helper tag class for the table below.
|
|
|
|
// It allows fast `char -> ConversionChar/LengthMod` checking and
|
|
|
|
// conversions.
|
|
|
|
class ConvTag {
|
|
|
|
public:
|
|
|
|
constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT
|
|
|
|
: tag_(static_cast<int8_t>(conversion_char)) {}
|
|
|
|
// We invert the length modifiers to make them negative so that we can easily
|
|
|
|
// test for them.
|
|
|
|
constexpr ConvTag(LengthMod length_mod) // NOLINT
|
|
|
|
: tag_(~static_cast<std::int8_t>(length_mod)) {}
|
|
|
|
// Everything else is -128, which is negative to make is_conv() simpler.
|
|
|
|
constexpr ConvTag() : tag_(-128) {}
|
|
|
|
|
|
|
|
bool is_conv() const { return tag_ >= 0; }
|
|
|
|
bool is_length() const { return tag_ < 0 && tag_ != -128; }
|
|
|
|
FormatConversionChar as_conv() const {
|
|
|
|
assert(is_conv());
|
|
|
|
return static_cast<FormatConversionChar>(tag_);
|
|
|
|
}
|
|
|
|
LengthMod as_length() const {
|
|
|
|
assert(is_length());
|
|
|
|
return static_cast<LengthMod>(~tag_);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::int8_t tag_;
|
|
|
|
};
|
|
|
|
|
|
|
|
extern const ConvTag kTags[256];
|
|
|
|
// Keep a single table for all the conversion chars and length modifiers.
|
|
|
|
inline ConvTag GetTagForChar(char c) {
|
|
|
|
return kTags[static_cast<unsigned char>(c)];
|
|
|
|
}
|
|
|
|
|
|
|
|
// Parse the format string provided in 'src' and pass the identified items into
|
|
|
|
// 'consumer'.
|
|
|
|
// Text runs will be passed by calling
|
|
|
|
// Consumer::Append(string_view);
|
|
|
|
// ConversionItems will be passed by calling
|
|
|
|
// Consumer::ConvertOne(UnboundConversion, string_view);
|
|
|
|
// In the case of ConvertOne, the string_view that is passed is the
|
|
|
|
// portion of the format string corresponding to the conversion, not including
|
|
|
|
// the leading %. On success, it returns true. On failure, it stops and returns
|
|
|
|
// false.
|
|
|
|
template <typename Consumer>
|
|
|
|
bool ParseFormatString(string_view src, Consumer consumer) {
|
|
|
|
int next_arg = 0;
|
|
|
|
const char* p = src.data();
|
|
|
|
const char* const end = p + src.size();
|
|
|
|
while (p != end) {
|
|
|
|
const char* percent = static_cast<const char*>(memchr(p, '%', end - p));
|
|
|
|
if (!percent) {
|
|
|
|
// We found the last substring.
|
|
|
|
return consumer.Append(string_view(p, end - p));
|
|
|
|
}
|
|
|
|
// We found a percent, so push the text run then process the percent.
|
|
|
|
if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false;
|
|
|
|
|
|
|
|
auto tag = GetTagForChar(percent[1]);
|
|
|
|
if (tag.is_conv()) {
|
|
|
|
if (ABSL_PREDICT_FALSE(next_arg < 0)) {
|
|
|
|
// This indicates an error in the format string.
|
|
|
|
// The only way to get `next_arg < 0` here is to have a positional
|
|
|
|
// argument first which sets next_arg to -1 and then a non-positional
|
|
|
|
// argument.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
p = percent + 2;
|
|
|
|
|
|
|
|
// Keep this case separate from the one below.
|
|
|
|
// ConvertOne is more efficient when the compiler can see that the `basic`
|
|
|
|
// flag is set.
|
|
|
|
UnboundConversion conv;
|
|
|
|
conv.conv = tag.as_conv();
|
|
|
|
conv.arg_position = ++next_arg;
|
|
|
|
if (ABSL_PREDICT_FALSE(
|
|
|
|
!consumer.ConvertOne(conv, string_view(percent + 1, 1)))) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else if (percent[1] != '%') {
|
|
|
|
UnboundConversion conv;
|
|
|
|
p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg);
|
|
|
|
if (ABSL_PREDICT_FALSE(p == nullptr)) return false;
|
|
|
|
if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(
|
|
|
|
conv, string_view(percent + 1, p - (percent + 1))))) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false;
|
|
|
|
p = percent + 2;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Always returns true, or fails to compile in a constexpr context if s does not
|
|
|
|
// point to a constexpr char array.
|
|
|
|
constexpr bool EnsureConstexpr(string_view s) {
|
|
|
|
return s.empty() || s[0] == s[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
class ParsedFormatBase {
|
|
|
|
public:
|
|
|
|
explicit ParsedFormatBase(
|
|
|
|
string_view format, bool allow_ignored,
|
|
|
|
std::initializer_list<FormatConversionCharSet> convs);
|
|
|
|
|
|
|
|
ParsedFormatBase(const ParsedFormatBase& other) { *this = other; }
|
|
|
|
|
|
|
|
ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); }
|
|
|
|
|
|
|
|
ParsedFormatBase& operator=(const ParsedFormatBase& other) {
|
|
|
|
if (this == &other) return *this;
|
|
|
|
has_error_ = other.has_error_;
|
|
|
|
items_ = other.items_;
|
|
|
|
size_t text_size = items_.empty() ? 0 : items_.back().text_end;
|
|
|
|
data_.reset(new char[text_size]);
|
|
|
|
memcpy(data_.get(), other.data_.get(), text_size);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
ParsedFormatBase& operator=(ParsedFormatBase&& other) {
|
|
|
|
if (this == &other) return *this;
|
|
|
|
has_error_ = other.has_error_;
|
|
|
|
data_ = std::move(other.data_);
|
|
|
|
items_ = std::move(other.items_);
|
|
|
|
// Reset the vector to make sure the invariants hold.
|
|
|
|
other.items_.clear();
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename Consumer>
|
|
|
|
bool ProcessFormat(Consumer consumer) const {
|
|
|
|
const char* const base = data_.get();
|
|
|
|
string_view text(base, 0);
|
|
|
|
for (const auto& item : items_) {
|
|
|
|
const char* const end = text.data() + text.size();
|
|
|
|
text = string_view(end, (base + item.text_end) - end);
|
|
|
|
if (item.is_conversion) {
|
|
|
|
if (!consumer.ConvertOne(item.conv, text)) return false;
|
|
|
|
} else {
|
|
|
|
if (!consumer.Append(text)) return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return !has_error_;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool has_error() const { return has_error_; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
// Returns whether the conversions match and if !allow_ignored it verifies
|
|
|
|
// that all conversions are used by the format.
|
|
|
|
bool MatchesConversions(
|
|
|
|
bool allow_ignored,
|
|
|
|
std::initializer_list<FormatConversionCharSet> convs) const;
|
|
|
|
|
|
|
|
struct ParsedFormatConsumer;
|
|
|
|
|
|
|
|
struct ConversionItem {
|
|
|
|
bool is_conversion;
|
|
|
|
// Points to the past-the-end location of this element in the data_ array.
|
|
|
|
size_t text_end;
|
|
|
|
UnboundConversion conv;
|
|
|
|
};
|
|
|
|
|
|
|
|
bool has_error_;
|
|
|
|
std::unique_ptr<char[]> data_;
|
|
|
|
std::vector<ConversionItem> items_;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// A value type representing a preparsed format. These can be created, copied
|
|
|
|
// around, and reused to speed up formatting loops.
|
|
|
|
// The user must specify through the template arguments the conversion
|
|
|
|
// characters used in the format. This will be checked at compile time.
|
|
|
|
//
|
|
|
|
// This class uses Conv enum values to specify each argument.
|
|
|
|
// This allows for more flexibility as you can specify multiple possible
|
|
|
|
// conversion characters for each argument.
|
|
|
|
// ParsedFormat<char...> is a simplified alias for when the user only
|
|
|
|
// needs to specify a single conversion character for each argument.
|
|
|
|
//
|
|
|
|
// Example:
|
|
|
|
// // Extended format supports multiple characters per argument:
|
|
|
|
// using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>;
|
|
|
|
// MyFormat GetFormat(bool use_hex) {
|
|
|
|
// if (use_hex) return MyFormat("foo %x bar");
|
|
|
|
// return MyFormat("foo %d bar");
|
|
|
|
// }
|
|
|
|
// // 'format' can be used with any value that supports 'd' and 'x',
|
|
|
|
// // like `int`.
|
|
|
|
// auto format = GetFormat(use_hex);
|
|
|
|
// value = StringF(format, i);
|
|
|
|
//
|
|
|
|
// This class also supports runtime format checking with the ::New() and
|
|
|
|
// ::NewAllowIgnored() factory functions.
|
|
|
|
// This is the only API that allows the user to pass a runtime specified format
|
|
|
|
// string. These factory functions will return NULL if the format does not match
|
|
|
|
// the conversions requested by the user.
|
|
|
|
template <FormatConversionCharSet... C>
|
|
|
|
class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase {
|
|
|
|
public:
|
|
|
|
explicit ExtendedParsedFormat(string_view format)
|
|
|
|
#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
|
|
|
|
__attribute__((
|
|
|
|
enable_if(str_format_internal::EnsureConstexpr(format),
|
|
|
|
"Format string is not constexpr."),
|
|
|
|
enable_if(str_format_internal::ValidFormatImpl<C...>(format),
|
|
|
|
"Format specified does not match the template arguments.")))
|
|
|
|
#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
|
|
|
|
: ExtendedParsedFormat(format, false) {
|
|
|
|
}
|
|
|
|
|
|
|
|
// ExtendedParsedFormat factory function.
|
|
|
|
// The user still has to specify the conversion characters, but they will not
|
|
|
|
// be checked at compile time. Instead, it will be checked at runtime.
|
|
|
|
// This delays the checking to runtime, but allows the user to pass
|
|
|
|
// dynamically sourced formats.
|
|
|
|
// It returns NULL if the format does not match the conversion characters.
|
|
|
|
// The user is responsible for checking the return value before using it.
|
|
|
|
//
|
|
|
|
// The 'New' variant will check that all the specified arguments are being
|
|
|
|
// consumed by the format and return NULL if any argument is being ignored.
|
|
|
|
// The 'NewAllowIgnored' variant will not verify this and will allow formats
|
|
|
|
// that ignore arguments.
|
|
|
|
static std::unique_ptr<ExtendedParsedFormat> New(string_view format) {
|
|
|
|
return New(format, false);
|
|
|
|
}
|
|
|
|
static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored(
|
|
|
|
string_view format) {
|
|
|
|
return New(format, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
static std::unique_ptr<ExtendedParsedFormat> New(string_view format,
|
|
|
|
bool allow_ignored) {
|
|
|
|
std::unique_ptr<ExtendedParsedFormat> conv(
|
|
|
|
new ExtendedParsedFormat(format, allow_ignored));
|
|
|
|
if (conv->has_error()) return nullptr;
|
|
|
|
return conv;
|
|
|
|
}
|
|
|
|
|
|
|
|
ExtendedParsedFormat(string_view s, bool allow_ignored)
|
|
|
|
: ParsedFormatBase(s, allow_ignored, {C...}) {}
|
|
|
|
};
|
|
|
|
} // namespace str_format_internal
|
|
|
|
ABSL_NAMESPACE_END
|
|
|
|
} // namespace absl
|
|
|
|
|
|
|
|
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
|