Merge pull request #10269 from terfendail:softdouble_round

pull/10284/head
Alexander Alekhin 7 years ago
commit e49febb70f
  1. 11
      modules/core/include/opencv2/core/softfloat.hpp
  2. 97
      modules/core/src/softfloat.cpp
  3. 126
      modules/core/test/test_math.cpp

@ -401,6 +401,9 @@ CV_EXPORTS int cvTrunc(const cv::softdouble& a);
CV_EXPORTS int cvRound(const cv::softfloat& a);
CV_EXPORTS int cvRound(const cv::softdouble& a);
/** @brief Rounds a number to nearest even long long integer */
CV_EXPORTS int64_t cvRound64(const cv::softdouble& a);
/** @brief Rounds a number down to integer */
CV_EXPORTS int cvFloor(const cv::softfloat& a);
CV_EXPORTS int cvFloor(const cv::softdouble& a);
@ -430,12 +433,18 @@ template<> inline short saturate_cast<short>(softdouble a) { return (short)std::
template<> inline int saturate_cast<int>(softfloat a) { return cvRound(a); }
template<> inline int saturate_cast<int>(softdouble a) { return cvRound(a); }
/** @brief Saturate cast to unsigned integer
template<> inline int64_t saturate_cast<int64_t>(softfloat a) { return cvRound(a); }
template<> inline int64_t saturate_cast<int64_t>(softdouble a) { return cvRound64(a); }
/** @brief Saturate cast to unsigned integer and unsigned long long integer
We intentionally do not clip negative numbers, to make -1 become 0xffffffff etc.
*/
template<> inline unsigned saturate_cast<unsigned>(softfloat a) { return cvRound(a); }
template<> inline unsigned saturate_cast<unsigned>(softdouble a) { return cvRound(a); }
template<> inline uint64_t saturate_cast<uint64_t>(softfloat a) { return cvRound(a); }
template<> inline uint64_t saturate_cast<uint64_t>(softdouble a) { return cvRound64(a); }
/** @brief Min and Max functions */
inline softfloat min(const softfloat& a, const softfloat& b) { return (a > b) ? b : a; }
inline softdouble min(const softdouble& a, const softdouble& b) { return (a > b) ? b : a; }

@ -183,6 +183,7 @@ static bool f32_lt( float32_t, float32_t );
| 64-bit (double-precision) floating-point operations.
*----------------------------------------------------------------------------*/
static int_fast32_t f64_to_i32( float64_t, uint_fast8_t, bool );
static int_fast64_t f64_to_i64( float64_t, uint_fast8_t, bool );
static int_fast32_t f64_to_i32_r_minMag( float64_t, bool );
static float32_t f64_to_f32( float64_t );
static float64_t f64_roundToInt( float64_t, uint_fast8_t, bool );
@ -258,6 +259,8 @@ int cvRound(const cv::softdouble& a) { return cv::f64_to_i32(a, cv::round_near_e
int cvFloor(const cv::softdouble& a) { return cv::f64_to_i32(a, cv::round_min, false); }
int cvCeil (const cv::softdouble& a) { return cv::f64_to_i32(a, cv::round_max, false); }
int64_t cvRound64(const cv::softdouble& a) { return cv::f64_to_i64(a, cv::round_near_even, false); }
namespace cv
{
softdouble::operator softfloat() const { return f64_to_f32(*this); }
@ -468,6 +471,7 @@ static float32_t softfloat_mulAddF32(uint_fast32_t, uint_fast32_t, uint_fast32_t
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
static int_fast64_t softfloat_roundToI64( bool, uint_fast64_t, uint_fast64_t, uint_fast8_t, bool);
struct exp16_sig64 { int_fast16_t exp; uint_fast64_t sig; };
static struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t );
@ -2026,6 +2030,59 @@ static int_fast32_t f64_to_i32( float64_t a, uint_fast8_t roundingMode, bool exa
return softfloat_roundToI32( sign, sig, roundingMode, exact );
}
static int_fast64_t f64_to_i64(float64_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast64_t uiA;
bool sign;
int_fast16_t exp;
uint_fast64_t sig;
int_fast16_t shiftDist;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
sign = signF64UI(uiA);
exp = expF64UI(uiA);
sig = fracF64UI(uiA);
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
#if (i64_fromNaN != i64_fromPosOverflow) || (i64_fromNaN != i64_fromNegOverflow)
if ((exp == 0x7FF) && sig) {
#if (i64_fromNaN == i64_fromPosOverflow)
sign = 0;
#elif (i64_fromNaN == i64_fromNegOverflow)
sign = 1;
#else
raiseFlags(flag_invalid);
return i64_fromNaN;
#endif
}
#endif
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if (exp) sig |= UINT64_C(0x0010000000000000);
shiftDist = 0x433 - exp;
if (shiftDist <= 0) {
uint_fast64_t z = sig << -shiftDist;
if ((shiftDist < -11) || (z & UINT64_C(0x8000000000000000)))
{
raiseFlags(flag_invalid);
return sign ? i64_fromNegOverflow : i64_fromPosOverflow;
}
return sign ? -(int_fast64_t)z : (int_fast64_t)z;
}
else {
if (shiftDist < 64)
return
softfloat_roundToI64(
sign, sig >> shiftDist, sig << (-shiftDist & 63), roundingMode, exact);
else
return
softfloat_roundToI64(
sign, 0, (shiftDist == 64) ? sig : (sig != 0), roundingMode, exact);
}
}
static int_fast32_t f64_to_i32_r_minMag( float64_t a, bool exact )
{
uint_fast64_t uiA;
@ -3076,6 +3133,46 @@ static int_fast32_t
return sign ? i32_fromNegOverflow : i32_fromPosOverflow;
}
static int_fast64_t
softfloat_roundToI64(
bool sign, uint_fast64_t sig, uint_fast64_t sigExtra, uint_fast8_t roundingMode, bool exact )
{
bool roundNearEven, doIncrement;
union { uint64_t ui; int64_t i; } uZ;
int_fast64_t z;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
roundNearEven = (roundingMode == round_near_even);
doIncrement = (UINT64_C(0x8000000000000000) <= sigExtra);
if (!roundNearEven && (roundingMode != round_near_maxMag)) {
doIncrement =
(roundingMode
== (sign ? round_min : round_max))
&& sigExtra;
}
if (doIncrement) {
++sig;
if (!sig) goto invalid;
sig &=
~(uint_fast64_t)
(!(sigExtra & UINT64_C(0x7FFFFFFFFFFFFFFF))
& roundNearEven);
}
uZ.ui = sign ? (~sig + 1) : sig;
z = uZ.i;
if (z && ((z < 0) ^ sign)) goto invalid;
if (exact && sigExtra) {
raiseFlags(flag_inexact);
}
return z;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
invalid:
raiseFlags(flag_invalid);
return sign ? i64_fromNegOverflow : i64_fromPosOverflow;
}
static struct uint128
softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t dist )
{

@ -3736,4 +3736,130 @@ TEST(Core_SoftFloat, sincos64)
}
}
TEST(Core_SoftFloat, CvRound)
{
struct
{
uint64_t inVal;
int64_t out64;
int32_t out32;
} _values[] =
{
{ 0x0123456789abcdefU, 0, 0 }, // 3.51270056408850369812238561681E-303
{ 0x0000000000000000U, 0, 0 }, // 0
{ 0x8000000000000000U, 0, 0 }, // -0
{ 0x000123456789abcdU, 0, 0 }, // 1.5822747438273385725152200433E-309
{ 0x800123456789abcdU, 0, 0 }, // -1.5822747438273385725152200433E-309
{ 0x7ff0000000000000U, INT64_MAX, INT32_MAX }, // +inf
{ 0xfff0000000000000U, INT64_MIN, INT32_MIN }, // -inf
{ 0x7ff0000000000001U, INT64_MAX, INT32_MAX }, // nan(casts to maximum value)
{ 0xfff0000000000001U, INT64_MAX, INT32_MAX }, // nan(casts to maximum value)
{ 0x7ffa5a5a5a5a5a5aU, INT64_MAX, INT32_MAX }, // nan(casts to maximum value)
{ 0xfffa5a5a5a5a5a5aU, INT64_MAX, INT32_MAX }, // nan(casts to maximum value)
{ 0x7fe123456789abcdU, INT64_MAX, INT32_MAX }, // 9.627645455595956656406699747E307
{ 0xffe123456789abcdU, INT64_MIN, INT32_MIN }, // -9.627645455595956656406699747E307
{ 0x43ffffffffffffffU, INT64_MAX, INT32_MAX }, // (2^53-1)*2^12
{ 0xc3ffffffffffffffU, INT64_MIN, INT32_MIN }, // -(2^53-1)*2^12
{ 0x43f0000000000000U, INT64_MAX, INT32_MAX }, // 2^64
{ 0xc3f0000000000000U, INT64_MIN, INT32_MIN }, // -2^64
{ 0x43efffffffffffffU, INT64_MAX, INT32_MAX }, // (2^53-1)*2^11
{ 0xc3efffffffffffffU, INT64_MIN, INT32_MIN }, // -(2^53-1)*2^11
{ 0x43e0000000000000U, INT64_MAX, INT32_MAX }, // 2^63
{ 0xc3e0000000000000U, -0x7fffffffffffffff-1, INT32_MIN }, // -2^63
{ 0x43dfffffffffffffU, 0x7ffffffffffffc00, INT32_MAX }, // (2^53-1)*2^10
{ 0xc3dfffffffffffffU, -0x7ffffffffffffc00, INT32_MIN }, // -(2^53-1)*2^10
{ 0x433fffffffffffffU, 0x1fffffffffffff, INT32_MAX }, // (2^53-1)
{ 0xc33fffffffffffffU, -0x1fffffffffffff, INT32_MIN }, // -(2^53-1)
{ 0x432fffffffffffffU, 0x10000000000000, INT32_MAX }, // (2^52-1) + 0.5
{ 0xc32fffffffffffffU, -0x10000000000000, INT32_MIN }, // -(2^52-1) - 0.5
{ 0x431fffffffffffffU, 0x8000000000000, INT32_MAX }, // (2^51-1) + 0.75
{ 0xc31fffffffffffffU, -0x8000000000000, INT32_MIN }, // -(2^51-1) - 0.75
{ 0x431ffffffffffffeU, 0x8000000000000, INT32_MAX }, // (2^51-1) + 0.5
{ 0xc31ffffffffffffeU, -0x8000000000000, INT32_MIN }, // -(2^51-1) - 0.5
{ 0x431ffffffffffffdU, 0x7ffffffffffff, INT32_MAX }, // (2^51-1) + 0.25
{ 0xc31ffffffffffffdU, -0x7ffffffffffff, INT32_MIN }, // -(2^51-1) - 0.25
{ 0x41f0000000000000U, 0x100000000, INT32_MAX }, // 2^32 = 4294967296
{ 0xc1f0000000000000U, -0x100000000, INT32_MIN }, // -2^32 = -4294967296
{ 0x41efffffffffffffU, 0x100000000, INT32_MAX }, // 4294967295.99999952316284179688
{ 0xc1efffffffffffffU, -0x100000000, INT32_MIN }, // -4294967295.99999952316284179688
{ 0x41effffffff00000U, 0x100000000, INT32_MAX }, // (2^32-1) + 0.5 = 4294967295.5
{ 0xc1effffffff00000U, -0x100000000, INT32_MIN }, // -(2^32-1) - 0.5 = -4294967295.5
{ 0x41efffffffe00000U, 0xffffffffll, INT32_MAX }, // (2^32-1)
{ 0xc1efffffffe00000U, -0xffffffffll, INT32_MIN }, // -(2^32-1)
{ 0x41e0000000000000U, 0x80000000ll, INT32_MAX }, // 2^31 = 2147483648
{ 0xc1e0000000000000U, -0x80000000ll, -0x7fffffff-1 }, // -2^31 = -2147483648
{ 0x41dfffffffffffffU, 0x80000000ll, INT32_MAX }, // 2147483647.99999976158142089844
{ 0xc1dfffffffffffffU, -0x80000000ll, -0x7fffffff-1 }, // -2147483647.99999976158142089844
{ 0x41dffffffff00000U, 0x80000000ll, INT32_MAX }, // (2^31-1) + 0.75
{ 0xc1dffffffff00000U, -0x80000000ll, -0x7fffffff-1 }, // -(2^31-1) - 0.75
{ 0x41dfffffffe00001U, 0x80000000ll, INT32_MAX }, // (2^31-1) + 0.5 + 2^-22
{ 0xc1dfffffffe00001U, -0x80000000ll, -0x7fffffff-1 }, // -(2^31-1) - 0.5 - 2^-22
{ 0x41dfffffffe00000U, 0x80000000ll, INT32_MAX }, // (2^31-1) + 0.5
{ 0xc1dfffffffe00000U, -0x80000000ll, -0x7fffffff-1 }, // -(2^31-1) - 0.5
{ 0x41dfffffffdfffffU, 0x7fffffff, 0x7fffffff }, // (2^31-1) + 0.5 - 2^-22
{ 0xc1dfffffffdfffffU, -0x7fffffff, -0x7fffffff }, // -(2^31-1) - 0.5 + 2^-22
{ 0x41dfffffffd00000U, 0x7fffffff, 0x7fffffff }, // (2^31-1) + 0.25
{ 0xc1dfffffffd00000U, -0x7fffffff, -0x7fffffff }, // -(2^31-1) - 0.25
{ 0x41dfffffffc00000U, 0x7fffffff, 0x7fffffff }, // (2^31-1)
{ 0xc1dfffffffc00000U, -0x7fffffff, -0x7fffffff }, // -(2^31-1)
{ 0x41d0000000000000U, 0x40000000, 0x40000000 }, // 2^30 = 2147483648
{ 0xc1d0000000000000U, -0x40000000, -0x40000000 }, // -2^30 = -2147483648
{ 0x4006000000000000U, 3, 3 }, // 2.75
{ 0xc006000000000000U, -3, -3 }, // -2.75
{ 0x4004000000000001U, 3, 3 }, // 2.5 + 2^-51
{ 0xc004000000000001U, -3, -3 }, // -2.5 - 2^-51
{ 0x4004000000000000U, 2, 2 }, // 2.5
{ 0xc004000000000000U, -2, -2 }, // -2.5
{ 0x4003ffffffffffffU, 2, 2 }, // 2.5 - 2^-51
{ 0xc003ffffffffffffU, -2, -2 }, // -2.5 + 2^-51
{ 0x4002000000000000U, 2, 2 }, // 2.25
{ 0xc002000000000000U, -2, -2 }, // -2.25
{ 0x3ffc000000000000U, 2, 2 }, // 1.75
{ 0xbffc000000000000U, -2, -2 }, // -1.75
{ 0x3ff8000000000001U, 2, 2 }, // 1.5 + 2^-52
{ 0xbff8000000000001U, -2, -2 }, // -1.5 - 2^-52
{ 0x3ff8000000000000U, 2, 2 }, // 1.5
{ 0xbff8000000000000U, -2, -2 }, // -1.5
{ 0x3ff7ffffffffffffU, 1, 1 }, // 1.5 - 2^-52
{ 0xbff7ffffffffffffU, -1, -1 }, // -1.5 + 2^-52
{ 0x3ff4000000000000U, 1, 1 }, // 1.25
{ 0xbff4000000000000U, -1, -1 }, // -1.25
{ 0x3fe8000000000000U, 1, 1 }, // 0.75
{ 0xbfe8000000000000U, -1, -1 }, // -0.75
{ 0x3fe0000000000001U, 1, 1 }, // 0.5 + 2^-53
{ 0xbfe0000000000001U, -1, -1 }, // -0.5 - 2^-53
{ 0x3fe0000000000000U, 0, 0 }, // 0.5
{ 0xbfe0000000000000U, 0, 0 }, // -0.5
{ 0x3fd8000000000000U, 0, 0 }, // 0.375
{ 0xbfd8000000000000U, 0, 0 }, // -0.375
{ 0x3fd0000000000000U, 0, 0 }, // 0.25
{ 0xbfd0000000000000U, 0, 0 }, // -0.25
{ 0x0ff123456789abcdU, 0, 0 }, // 6.89918601543515033558134828315E-232
{ 0x8ff123456789abcdU, 0, 0 } // -6.89918601543515033558134828315E-232
};
struct testvalues
{
softdouble inVal;
int64_t out64;
int32_t out32;
} *values = (testvalues*)_values;
for (int i = 0, maxi = sizeof(_values) / sizeof(_values[0]); i < maxi; i++)
{
EXPECT_EQ(values[i].out64, cvRound64(values[i].inVal));
EXPECT_EQ(values[i].out64, saturate_cast<int64_t>(values[i].inVal));
EXPECT_EQ((uint64_t)(values[i].out64), saturate_cast<uint64_t>(values[i].inVal));
EXPECT_EQ(values[i].out32, cvRound(values[i].inVal));
EXPECT_EQ(values[i].out32, saturate_cast<int32_t>(values[i].inVal));
EXPECT_EQ((uint32_t)(values[i].out32), saturate_cast<uint32_t>(values[i].inVal));
}
}
/* End of file. */

Loading…
Cancel
Save