You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
423 lines
14 KiB
423 lines
14 KiB
// Copyright 2018 The Abseil Authors. |
|
// |
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
|
// you may not use this file except in compliance with the License. |
|
// You may obtain a copy of the License at |
|
// |
|
// https://www.apache.org/licenses/LICENSE-2.0 |
|
// |
|
// Unless required by applicable law or agreed to in writing, software |
|
// distributed under the License is distributed on an "AS IS" BASIS, |
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
// See the License for the specific language governing permissions and |
|
// limitations under the License. |
|
|
|
#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ |
|
#define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ |
|
|
|
#include <algorithm> |
|
#include <cstdint> |
|
#include <iostream> |
|
#include <string> |
|
|
|
#include "absl/base/config.h" |
|
#include "absl/strings/ascii.h" |
|
#include "absl/strings/internal/charconv_parse.h" |
|
#include "absl/strings/string_view.h" |
|
|
|
namespace absl { |
|
ABSL_NAMESPACE_BEGIN |
|
namespace strings_internal { |
|
|
|
// The largest power that 5 that can be raised to, and still fit in a uint32_t. |
|
constexpr int kMaxSmallPowerOfFive = 13; |
|
// The largest power that 10 that can be raised to, and still fit in a uint32_t. |
|
constexpr int kMaxSmallPowerOfTen = 9; |
|
|
|
ABSL_DLL extern const uint32_t |
|
kFiveToNth[kMaxSmallPowerOfFive + 1]; |
|
ABSL_DLL extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1]; |
|
|
|
// Large, fixed-width unsigned integer. |
|
// |
|
// Exact rounding for decimal-to-binary floating point conversion requires very |
|
// large integer math, but a design goal of absl::from_chars is to avoid |
|
// allocating memory. The integer precision needed for decimal-to-binary |
|
// conversions is large but bounded, so a huge fixed-width integer class |
|
// suffices. |
|
// |
|
// This is an intentionally limited big integer class. Only needed operations |
|
// are implemented. All storage lives in an array data member, and all |
|
// arithmetic is done in-place, to avoid requiring separate storage for operand |
|
// and result. |
|
// |
|
// This is an internal class. Some methods live in the .cc file, and are |
|
// instantiated only for the values of max_words we need. |
|
template <int max_words> |
|
class BigUnsigned { |
|
public: |
|
static_assert(max_words == 4 || max_words == 84, |
|
"unsupported max_words value"); |
|
|
|
BigUnsigned() : size_(0), words_{} {} |
|
explicit constexpr BigUnsigned(uint64_t v) |
|
: size_((v >> 32) ? 2 : v ? 1 : 0), |
|
words_{static_cast<uint32_t>(v & 0xffffffffu), |
|
static_cast<uint32_t>(v >> 32)} {} |
|
|
|
// Constructs a BigUnsigned from the given string_view containing a decimal |
|
// value. If the input string is not a decimal integer, constructs a 0 |
|
// instead. |
|
explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} { |
|
// Check for valid input, returning a 0 otherwise. This is reasonable |
|
// behavior only because this constructor is for unit tests. |
|
if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() || |
|
sv.empty()) { |
|
return; |
|
} |
|
int exponent_adjust = |
|
ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1); |
|
if (exponent_adjust > 0) { |
|
MultiplyByTenToTheNth(exponent_adjust); |
|
} |
|
} |
|
|
|
// Loads the mantissa value of a previously-parsed float. |
|
// |
|
// Returns the associated decimal exponent. The value of the parsed float is |
|
// exactly *this * 10**exponent. |
|
int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits); |
|
|
|
// Returns the number of decimal digits of precision this type provides. All |
|
// numbers with this many decimal digits or fewer are representable by this |
|
// type. |
|
// |
|
// Analagous to std::numeric_limits<BigUnsigned>::digits10. |
|
static constexpr int Digits10() { |
|
// 9975007/1035508 is very slightly less than log10(2**32). |
|
return static_cast<uint64_t>(max_words) * 9975007 / 1035508; |
|
} |
|
|
|
// Shifts left by the given number of bits. |
|
void ShiftLeft(int count) { |
|
if (count > 0) { |
|
const int word_shift = count / 32; |
|
if (word_shift >= max_words) { |
|
SetToZero(); |
|
return; |
|
} |
|
size_ = (std::min)(size_ + word_shift, max_words); |
|
count %= 32; |
|
if (count == 0) { |
|
std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_); |
|
} else { |
|
for (int i = (std::min)(size_, max_words - 1); i > word_shift; --i) { |
|
words_[i] = (words_[i - word_shift] << count) | |
|
(words_[i - word_shift - 1] >> (32 - count)); |
|
} |
|
words_[word_shift] = words_[0] << count; |
|
// Grow size_ if necessary. |
|
if (size_ < max_words && words_[size_]) { |
|
++size_; |
|
} |
|
} |
|
std::fill(words_, words_ + word_shift, 0u); |
|
} |
|
} |
|
|
|
|
|
// Multiplies by v in-place. |
|
void MultiplyBy(uint32_t v) { |
|
if (size_ == 0 || v == 1) { |
|
return; |
|
} |
|
if (v == 0) { |
|
SetToZero(); |
|
return; |
|
} |
|
const uint64_t factor = v; |
|
uint64_t window = 0; |
|
for (int i = 0; i < size_; ++i) { |
|
window += factor * words_[i]; |
|
words_[i] = window & 0xffffffff; |
|
window >>= 32; |
|
} |
|
// If carry bits remain and there's space for them, grow size_. |
|
if (window && size_ < max_words) { |
|
words_[size_] = window & 0xffffffff; |
|
++size_; |
|
} |
|
} |
|
|
|
void MultiplyBy(uint64_t v) { |
|
uint32_t words[2]; |
|
words[0] = static_cast<uint32_t>(v); |
|
words[1] = static_cast<uint32_t>(v >> 32); |
|
if (words[1] == 0) { |
|
MultiplyBy(words[0]); |
|
} else { |
|
MultiplyBy(2, words); |
|
} |
|
} |
|
|
|
// Multiplies in place by 5 to the power of n. n must be non-negative. |
|
void MultiplyByFiveToTheNth(int n) { |
|
while (n >= kMaxSmallPowerOfFive) { |
|
MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]); |
|
n -= kMaxSmallPowerOfFive; |
|
} |
|
if (n > 0) { |
|
MultiplyBy(kFiveToNth[n]); |
|
} |
|
} |
|
|
|
// Multiplies in place by 10 to the power of n. n must be non-negative. |
|
void MultiplyByTenToTheNth(int n) { |
|
if (n > kMaxSmallPowerOfTen) { |
|
// For large n, raise to a power of 5, then shift left by the same amount. |
|
// (10**n == 5**n * 2**n.) This requires fewer multiplications overall. |
|
MultiplyByFiveToTheNth(n); |
|
ShiftLeft(n); |
|
} else if (n > 0) { |
|
// We can do this more quickly for very small N by using a single |
|
// multiplication. |
|
MultiplyBy(kTenToNth[n]); |
|
} |
|
} |
|
|
|
// Returns the value of 5**n, for non-negative n. This implementation uses |
|
// a lookup table, and is faster then seeding a BigUnsigned with 1 and calling |
|
// MultiplyByFiveToTheNth(). |
|
static BigUnsigned FiveToTheNth(int n); |
|
|
|
// Multiplies by another BigUnsigned, in-place. |
|
template <int M> |
|
void MultiplyBy(const BigUnsigned<M>& other) { |
|
MultiplyBy(other.size(), other.words()); |
|
} |
|
|
|
void SetToZero() { |
|
std::fill(words_, words_ + size_, 0u); |
|
size_ = 0; |
|
} |
|
|
|
// Returns the value of the nth word of this BigUnsigned. This is |
|
// range-checked, and returns 0 on out-of-bounds accesses. |
|
uint32_t GetWord(int index) const { |
|
if (index < 0 || index >= size_) { |
|
return 0; |
|
} |
|
return words_[index]; |
|
} |
|
|
|
// Returns this integer as a decimal string. This is not used in the decimal- |
|
// to-binary conversion; it is intended to aid in testing. |
|
std::string ToString() const; |
|
|
|
int size() const { return size_; } |
|
const uint32_t* words() const { return words_; } |
|
|
|
private: |
|
// Reads the number between [begin, end), possibly containing a decimal point, |
|
// into this BigUnsigned. |
|
// |
|
// Callers are required to ensure [begin, end) contains a valid number, with |
|
// one or more decimal digits and at most one decimal point. This routine |
|
// will behave unpredictably if these preconditions are not met. |
|
// |
|
// Only the first `significant_digits` digits are read. Digits beyond this |
|
// limit are "sticky": If the final significant digit is 0 or 5, and if any |
|
// dropped digit is nonzero, then that final significant digit is adjusted up |
|
// to 1 or 6. This adjustment allows for precise rounding. |
|
// |
|
// Returns `exponent_adjustment`, a power-of-ten exponent adjustment to |
|
// account for the decimal point and for dropped significant digits. After |
|
// this function returns, |
|
// actual_value_of_parsed_string ~= *this * 10**exponent_adjustment. |
|
int ReadDigits(const char* begin, const char* end, int significant_digits); |
|
|
|
// Performs a step of big integer multiplication. This computes the full |
|
// (64-bit-wide) values that should be added at the given index (step), and |
|
// adds to that location in-place. |
|
// |
|
// Because our math all occurs in place, we must multiply starting from the |
|
// highest word working downward. (This is a bit more expensive due to the |
|
// extra carries involved.) |
|
// |
|
// This must be called in steps, for each word to be calculated, starting from |
|
// the high end and working down to 0. The first value of `step` should be |
|
// `std::min(original_size + other.size_ - 2, max_words - 1)`. |
|
// The reason for this expression is that multiplying the i'th word from one |
|
// multiplicand and the j'th word of another multiplicand creates a |
|
// two-word-wide value to be stored at the (i+j)'th element. The highest |
|
// word indices we will access are `original_size - 1` from this object, and |
|
// `other.size_ - 1` from our operand. Therefore, |
|
// `original_size + other.size_ - 2` is the first step we should calculate, |
|
// but limited on an upper bound by max_words. |
|
|
|
// Working from high-to-low ensures that we do not overwrite the portions of |
|
// the initial value of *this which are still needed for later steps. |
|
// |
|
// Once called with step == 0, *this contains the result of the |
|
// multiplication. |
|
// |
|
// `original_size` is the size_ of *this before the first call to |
|
// MultiplyStep(). `other_words` and `other_size` are the contents of our |
|
// operand. `step` is the step to perform, as described above. |
|
void MultiplyStep(int original_size, const uint32_t* other_words, |
|
int other_size, int step); |
|
|
|
void MultiplyBy(int other_size, const uint32_t* other_words) { |
|
const int original_size = size_; |
|
const int first_step = |
|
(std::min)(original_size + other_size - 2, max_words - 1); |
|
for (int step = first_step; step >= 0; --step) { |
|
MultiplyStep(original_size, other_words, other_size, step); |
|
} |
|
} |
|
|
|
// Adds a 32-bit value to the index'th word, with carry. |
|
void AddWithCarry(int index, uint32_t value) { |
|
if (value) { |
|
while (index < max_words && value > 0) { |
|
words_[index] += value; |
|
// carry if we overflowed in this word: |
|
if (value > words_[index]) { |
|
value = 1; |
|
++index; |
|
} else { |
|
value = 0; |
|
} |
|
} |
|
size_ = (std::min)(max_words, (std::max)(index + 1, size_)); |
|
} |
|
} |
|
|
|
void AddWithCarry(int index, uint64_t value) { |
|
if (value && index < max_words) { |
|
uint32_t high = value >> 32; |
|
uint32_t low = value & 0xffffffff; |
|
words_[index] += low; |
|
if (words_[index] < low) { |
|
++high; |
|
if (high == 0) { |
|
// Carry from the low word caused our high word to overflow. |
|
// Short circuit here to do the right thing. |
|
AddWithCarry(index + 2, static_cast<uint32_t>(1)); |
|
return; |
|
} |
|
} |
|
if (high > 0) { |
|
AddWithCarry(index + 1, high); |
|
} else { |
|
// Normally 32-bit AddWithCarry() sets size_, but since we don't call |
|
// it when `high` is 0, do it ourselves here. |
|
size_ = (std::min)(max_words, (std::max)(index + 1, size_)); |
|
} |
|
} |
|
} |
|
|
|
// Divide this in place by a constant divisor. Returns the remainder of the |
|
// division. |
|
template <uint32_t divisor> |
|
uint32_t DivMod() { |
|
uint64_t accumulator = 0; |
|
for (int i = size_ - 1; i >= 0; --i) { |
|
accumulator <<= 32; |
|
accumulator += words_[i]; |
|
// accumulator / divisor will never overflow an int32_t in this loop |
|
words_[i] = static_cast<uint32_t>(accumulator / divisor); |
|
accumulator = accumulator % divisor; |
|
} |
|
while (size_ > 0 && words_[size_ - 1] == 0) { |
|
--size_; |
|
} |
|
return static_cast<uint32_t>(accumulator); |
|
} |
|
|
|
// The number of elements in words_ that may carry significant values. |
|
// All elements beyond this point are 0. |
|
// |
|
// When size_ is 0, this BigUnsigned stores the value 0. |
|
// When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is |
|
// nonzero. This can occur due to overflow truncation. |
|
// In particular, x.size_ != y.size_ does *not* imply x != y. |
|
int size_; |
|
uint32_t words_[max_words]; |
|
}; |
|
|
|
// Compares two big integer instances. |
|
// |
|
// Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs. |
|
template <int N, int M> |
|
int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
|
int limit = (std::max)(lhs.size(), rhs.size()); |
|
for (int i = limit - 1; i >= 0; --i) { |
|
const uint32_t lhs_word = lhs.GetWord(i); |
|
const uint32_t rhs_word = rhs.GetWord(i); |
|
if (lhs_word < rhs_word) { |
|
return -1; |
|
} else if (lhs_word > rhs_word) { |
|
return 1; |
|
} |
|
} |
|
return 0; |
|
} |
|
|
|
template <int N, int M> |
|
bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
|
int limit = (std::max)(lhs.size(), rhs.size()); |
|
for (int i = 0; i < limit; ++i) { |
|
if (lhs.GetWord(i) != rhs.GetWord(i)) { |
|
return false; |
|
} |
|
} |
|
return true; |
|
} |
|
|
|
template <int N, int M> |
|
bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
|
return !(lhs == rhs); |
|
} |
|
|
|
template <int N, int M> |
|
bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
|
return Compare(lhs, rhs) == -1; |
|
} |
|
|
|
template <int N, int M> |
|
bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
|
return rhs < lhs; |
|
} |
|
template <int N, int M> |
|
bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
|
return !(rhs < lhs); |
|
} |
|
template <int N, int M> |
|
bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
|
return !(lhs < rhs); |
|
} |
|
|
|
// Output operator for BigUnsigned, for testing purposes only. |
|
template <int N> |
|
std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) { |
|
return os << num.ToString(); |
|
} |
|
|
|
// Explicit instantiation declarations for the sizes of BigUnsigned that we |
|
// are using. |
|
// |
|
// For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is |
|
// still bigger than an int128, and 84 is a large value we will want to use |
|
// in the from_chars implementation. |
|
// |
|
// Comments justifying the use of 84 belong in the from_chars implementation, |
|
// and will be added in a follow-up CL. |
|
extern template class BigUnsigned<4>; |
|
extern template class BigUnsigned<84>; |
|
|
|
} // namespace strings_internal |
|
ABSL_NAMESPACE_END |
|
} // namespace absl |
|
|
|
#endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
|
|
|