diff --git a/modules/core/include/opencv2/core/hal/intrin_avx.hpp b/modules/core/include/opencv2/core/hal/intrin_avx.hpp index 6e415f1e75..c7ce1e3d33 100644 --- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp @@ -3167,12 +3167,18 @@ inline void v_pack_store(hfloat* ptr, const v_float32x8& a) inline void v256_cleanup() { _mm256_zeroall(); } #include "intrin_math.hpp" -inline v_float32x8 v_exp(v_float32x8 x) { return v_exp_default_32f(x); } -inline v_float32x8 v_log(v_float32x8 x) { return v_log_default_32f(x); } -inline v_float32x8 v_erf(v_float32x8 x) { return v_erf_default_32f(x); } - -inline v_float64x4 v_exp(v_float64x4 x) { return v_exp_default_64f(x); } -inline v_float64x4 v_log(v_float64x4 x) { return v_log_default_64f(x); } +inline v_float32x8 v_exp(const v_float32x8& x) { return v_exp_default_32f(x); } +inline v_float32x8 v_log(const v_float32x8& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x8& x, v_float32x8& s, v_float32x8& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x8 v_sin(const v_float32x8& x) { return v_sin_default_32f(x); } +inline v_float32x8 v_cos(const v_float32x8& x) { return v_cos_default_32f(x); } +inline v_float32x8 v_erf(const v_float32x8& x) { return v_erf_default_32f(x); } + +inline v_float64x4 v_exp(const v_float64x4& x) { return v_exp_default_64f(x); } +inline v_float64x4 v_log(const v_float64x4& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x4& x, v_float64x4& s, v_float64x4& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x4 v_sin(const v_float64x4& x) { return v_sin_default_64f(x); } +inline v_float64x4 v_cos(const v_float64x4& x) { return v_cos_default_64f(x); } CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END diff --git a/modules/core/include/opencv2/core/hal/intrin_avx512.hpp b/modules/core/include/opencv2/core/hal/intrin_avx512.hpp index 24007f4d16..077b4d17a7 100644 --- a/modules/core/include/opencv2/core/hal/intrin_avx512.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_avx512.hpp @@ -3079,12 +3079,18 @@ inline int v_scan_forward(const v_float64x8& a) { return trailingZeros32(v_signm inline void v512_cleanup() { _mm256_zeroall(); } #include "intrin_math.hpp" -inline v_float32x16 v_exp(v_float32x16 x) { return v_exp_default_32f(x); } -inline v_float32x16 v_log(v_float32x16 x) { return v_log_default_32f(x); } -inline v_float32x16 v_erf(v_float32x16 x) { return v_erf_default_32f(x); } - -inline v_float64x8 v_exp(v_float64x8 x) { return v_exp_default_64f(x); } -inline v_float64x8 v_log(v_float64x8 x) { return v_log_default_64f(x); } +inline v_float32x16 v_exp(const v_float32x16& x) { return v_exp_default_32f(x); } +inline v_float32x16 v_log(const v_float32x16& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x16& x, v_float32x16& s, v_float32x16& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x16 v_sin(const v_float32x16& x) { return v_sin_default_32f(x); } +inline v_float32x16 v_cos(const v_float32x16& x) { return v_cos_default_32f(x); } +inline v_float32x16 v_erf(const v_float32x16& x) { return v_erf_default_32f(x); } + +inline v_float64x8 v_exp(const v_float64x8& x) { return v_exp_default_64f(x); } +inline v_float64x8 v_log(const v_float64x8& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x8& x, v_float64x8& s, v_float64x8& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x8 v_sin(const v_float64x8& x) { return v_sin_default_64f(x); } +inline v_float64x8 v_cos(const v_float64x8& x) { return v_cos_default_64f(x); } CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp index 653f51b145..3e5d484145 100644 --- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -264,7 +264,7 @@ Most of these operations return only one value. ### Other math - Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude, @ref v_exp, @ref v_log, - @ref v_erf + @ref v_erf, @ref v_sin, @ref v_cos - Absolute values: @ref v_abs, @ref v_absdiff, @ref v_absdiffs ### Conversions @@ -366,6 +366,7 @@ Floating point: |broadcast_element | x | | |exp | x | x | |log | x | x | +|sin, cos | x | x | @{ */ @@ -745,10 +746,41 @@ OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp) */ OPENCV_HAL_IMPL_MATH_FUNC(v_erf, std::erf, _Tp) -//! @cond IGNORED +/** + * @brief Compute sine \f$ sin(x) \f$ and cosine \f$ cos(x) \f$ of elements at the same time + * + * Only for floating point types. Core implementation steps: + * 1. Input Normalization: Scale the periodicity from 2π to 4 and reduce the angle to the range \f$ [0, \frac{\pi}{4}] \f$ using periodicity and trigonometric identities. + * 2. Polynomial Approximation for \f$ sin(x) \f$ and \f$ cos(x) \f$: + * - For float16 and float32, use a Taylor series with 4 terms for sine and 5 terms for cosine. + * - For float64, use a Taylor series with 7 terms for sine and 8 terms for cosine. + * 3. Select Results: select and convert the final sine and cosine values for the original input angle. + * + * @note The precision of the calculation depends on the implementation and the data type of the input vector. + */ +template +inline void v_sincos(const v_reg<_Tp, n>& x, v_reg<_Tp, n>& s, v_reg<_Tp, n>& c) +{ + for( int i = 0; i < n; i++ ) + { + s.s[i] = std::sin(x.s[i]); + c.s[i] = std::cos(x.s[i]); + } +} + +/** + * @brief Sine \f$ sin(x) \f$ of elements + * + * Only for floating point types. Core implementation the same as @ref v_sincos. + */ OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp) + +/** + * @brief Cosine \f$ cos(x) \f$ of elements + * + * Only for floating point types. Core implementation the same as @ref v_sincos. + */ OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp) -//! @endcond /** @brief Absolute value of elements diff --git a/modules/core/include/opencv2/core/hal/intrin_lasx.hpp b/modules/core/include/opencv2/core/hal/intrin_lasx.hpp index 1163e65748..68d08b2ef4 100644 --- a/modules/core/include/opencv2/core/hal/intrin_lasx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_lasx.hpp @@ -3014,12 +3014,18 @@ inline void v_pack_store(hfloat* ptr, const v_float32x8& a) inline void v256_cleanup() {} #include "intrin_math.hpp" -inline v_float32x8 v_exp(v_float32x8 x) { return v_exp_default_32f(x); } -inline v_float32x8 v_log(v_float32x8 x) { return v_log_default_32f(x); } -inline v_float32x8 v_erf(v_float32x8 x) { return v_erf_default_32f(x); } - -inline v_float64x4 v_exp(v_float64x4 x) { return v_exp_default_64f(x); } -inline v_float64x4 v_log(v_float64x4 x) { return v_log_default_64f(x); } +inline v_float32x8 v_exp(const v_float32x8& x) { return v_exp_default_32f(x); } +inline v_float32x8 v_log(const v_float32x8& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x8& x, v_float32x8& s, v_float32x8& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x8 v_sin(const v_float32x8& x) { return v_sin_default_32f(x); } +inline v_float32x8 v_cos(const v_float32x8& x) { return v_cos_default_32f(x); } +inline v_float32x8 v_erf(const v_float32x8& x) { return v_erf_default_32f(x); } + +inline v_float64x4 v_exp(const v_float64x4& x) { return v_exp_default_64f(x); } +inline v_float64x4 v_log(const v_float64x4& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x4& x, v_float64x4& s, v_float64x4& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x4 v_sin(const v_float64x4& x) { return v_sin_default_64f(x); } +inline v_float64x4 v_cos(const v_float64x4& x) { return v_cos_default_64f(x); } CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END diff --git a/modules/core/include/opencv2/core/hal/intrin_lsx.hpp b/modules/core/include/opencv2/core/hal/intrin_lsx.hpp index b2aeb4fcc2..a2f23d6abe 100644 --- a/modules/core/include/opencv2/core/hal/intrin_lsx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_lsx.hpp @@ -2524,12 +2524,18 @@ inline void v_pack_store(hfloat* ptr, const v_float32x4& a) inline void v_cleanup() {} #include "intrin_math.hpp" -inline v_float32x4 v_exp(v_float32x4 x) { return v_exp_default_32f(x); } -inline v_float32x4 v_log(v_float32x4 x) { return v_log_default_32f(x); } -inline v_float32x4 v_erf(v_float32x4 x) { return v_erf_default_32f(x); } - -inline v_float64x2 v_exp(v_float64x2 x) { return v_exp_default_64f(x); } -inline v_float64x2 v_log(v_float64x2 x) { return v_log_default_64f(x); } +inline v_float32x4 v_exp(const v_float32x4& x) { return v_exp_default_32f(x); } +inline v_float32x4 v_log(const v_float32x4& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x4& x, v_float32x4& s, v_float32x4& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x4 v_sin(const v_float32x4& x) { return v_sin_default_32f(x); } +inline v_float32x4 v_cos(const v_float32x4& x) { return v_cos_default_32f(x); } +inline v_float32x4 v_erf(const v_float32x4& x) { return v_erf_default_32f(x); } + +inline v_float64x2 v_exp(const v_float64x2& x) { return v_exp_default_64f(x); } +inline v_float64x2 v_log(const v_float64x2& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x2& x, v_float64x2& s, v_float64x2& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x2 v_sin(const v_float64x2& x) { return v_sin_default_64f(x); } +inline v_float64x2 v_cos(const v_float64x2& x) { return v_cos_default_64f(x); } CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END diff --git a/modules/core/include/opencv2/core/hal/intrin_math.hpp b/modules/core/include/opencv2/core/hal/intrin_math.hpp index 36aa90902e..b7e649e744 100644 --- a/modules/core/include/opencv2/core/hal/intrin_math.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_math.hpp @@ -405,6 +405,248 @@ inline _TpVec64F v_log_default_64f(const _TpVec64F &x) { } //! @} +//! @name Sine and Cosine +//! @{ +template +inline void v_sincos_default_16f(const _TpVec16F &x, _TpVec16F &ysin, _TpVec16F &ycos) { + const _TpVec16F v_cephes_FOPI = v_setall_<_TpVec16F>(hfloat(1.27323954473516f)); // 4 / M_PI + const _TpVec16F v_minus_DP1 = v_setall_<_TpVec16F>(hfloat(-0.78515625f)); + const _TpVec16F v_minus_DP2 = v_setall_<_TpVec16F>(hfloat(-2.4187564849853515625E-4f)); + const _TpVec16F v_minus_DP3 = v_setall_<_TpVec16F>(hfloat(-3.77489497744594108E-8f)); + const _TpVec16F v_sincof_p0 = v_setall_<_TpVec16F>(hfloat(-1.9515295891E-4f)); + const _TpVec16F v_sincof_p1 = v_setall_<_TpVec16F>(hfloat(8.3321608736E-3f)); + const _TpVec16F v_sincof_p2 = v_setall_<_TpVec16F>(hfloat(-1.6666654611E-1f)); + const _TpVec16F v_coscof_p0 = v_setall_<_TpVec16F>(hfloat(2.443315711809948E-5f)); + const _TpVec16F v_coscof_p1 = v_setall_<_TpVec16F>(hfloat(-1.388731625493765E-3f)); + const _TpVec16F v_coscof_p2 = v_setall_<_TpVec16F>(hfloat(4.166664568298827E-2f)); + const _TpVec16F v_nan = v_reinterpret_as_f16(v_setall_<_TpVec16S>((short)0x7e00)); + const _TpVec16F v_neg_zero = v_setall_<_TpVec16F>(hfloat(-0.f)); + + _TpVec16F _vx, _vy, sign_mask_sin, sign_mask_cos; + _TpVec16S emm2; + + sign_mask_sin = v_lt(x, v_setzero_<_TpVec16F>()); + _vx = v_abs(x); + _vy = v_mul(_vx, v_cephes_FOPI); + + emm2 = v_trunc(_vy); + emm2 = v_add(emm2, v_setall_<_TpVec16S>((short)1)); + emm2 = v_and(emm2, v_setall_<_TpVec16S>((short)~1)); + _vy = v_cvt_f16(emm2); + + _TpVec16F poly_mask = v_reinterpret_as_f16(v_eq(v_and(emm2, v_setall_<_TpVec16S>((short)2)), v_setall_<_TpVec16S>((short)0))); + + _vx = v_fma(_vy, v_minus_DP1, _vx); + _vx = v_fma(_vy, v_minus_DP2, _vx); + _vx = v_fma(_vy, v_minus_DP3, _vx); + + sign_mask_sin = v_xor(sign_mask_sin, v_reinterpret_as_f16(v_eq(v_and(emm2, v_setall_<_TpVec16S>((short)4)), v_setall_<_TpVec16S>((short)0)))); + sign_mask_cos = v_reinterpret_as_f16(v_eq(v_and(v_sub(emm2, v_setall_<_TpVec16S>((short)2)), v_setall_<_TpVec16S>((short)4)), v_setall_<_TpVec16S>((short)0))); + + _TpVec16F _vxx = v_mul(_vx, _vx); + _TpVec16F y1, y2; + + y1 = v_fma(v_coscof_p0, _vxx, v_coscof_p1); + y1 = v_fma(y1, _vxx, v_coscof_p2); + y1 = v_fma(y1, _vxx, v_setall_<_TpVec16F>(hfloat(-0.5f))); + y1 = v_fma(y1, _vxx, v_setall_<_TpVec16F>(hfloat(1.f))); + + y2 = v_fma(v_sincof_p0, _vxx, v_sincof_p1); + y2 = v_fma(y2, _vxx, v_sincof_p2); + y2 = v_mul(y2, _vxx); + y2 = v_fma(y2, _vx, _vx); + + ysin = v_select(poly_mask, y2, y1); + ycos = v_select(poly_mask, y1, y2); + ysin = v_select(sign_mask_sin, ysin, v_xor(v_neg_zero, ysin)); + ycos = v_select(sign_mask_cos, v_xor(v_neg_zero, ycos), ycos); + + // sincos(NAN) -> NAN, sincos(±INF) -> NAN + _TpVec16F mask_inf = v_eq(_vx, v_reinterpret_as_f16(v_setall_<_TpVec16S>((short)0x7c00))); + _TpVec16F mask_nan = v_or(mask_inf, v_ne(x, x)); + ysin = v_select(mask_nan, v_nan, ysin); + ycos = v_select(mask_nan, v_nan, ycos); +} + +template +inline _TpVec16F v_sin_default_16f(const _TpVec16F &x) { + _TpVec16F ysin, ycos; + v_sincos_default_16f<_TpVec16F, _TpVec16S>(x, ysin, ycos); + return ysin; +} + +template +inline _TpVec16F v_cos_default_16f(const _TpVec16F &x) { + _TpVec16F ysin, ycos; + v_sincos_default_16f<_TpVec16F, _TpVec16S>(x, ysin, ycos); + return ycos; +} + + +template +inline void v_sincos_default_32f(const _TpVec32F &x, _TpVec32F &ysin, _TpVec32F &ycos) { + const _TpVec32F v_cephes_FOPI = v_setall_<_TpVec32F>(1.27323954473516f); // 4 / M_PI + const _TpVec32F v_minus_DP1 = v_setall_<_TpVec32F>(-0.78515625f); + const _TpVec32F v_minus_DP2 = v_setall_<_TpVec32F>(-2.4187564849853515625E-4f); + const _TpVec32F v_minus_DP3 = v_setall_<_TpVec32F>(-3.77489497744594108E-8f); + const _TpVec32F v_sincof_p0 = v_setall_<_TpVec32F>(-1.9515295891E-4f); + const _TpVec32F v_sincof_p1 = v_setall_<_TpVec32F>(8.3321608736E-3f); + const _TpVec32F v_sincof_p2 = v_setall_<_TpVec32F>(-1.6666654611E-1f); + const _TpVec32F v_coscof_p0 = v_setall_<_TpVec32F>(2.443315711809948E-5f); + const _TpVec32F v_coscof_p1 = v_setall_<_TpVec32F>(-1.388731625493765E-3f); + const _TpVec32F v_coscof_p2 = v_setall_<_TpVec32F>(4.166664568298827E-2f); + const _TpVec32F v_nan = v_reinterpret_as_f32(v_setall_<_TpVec32S>((int)0x7fc00000)); + const _TpVec32F v_neg_zero = v_setall_<_TpVec32F>(-0.f); + + _TpVec32F _vx, _vy, sign_mask_sin, sign_mask_cos; + _TpVec32S emm2; + + sign_mask_sin = v_lt(x, v_setzero_<_TpVec32F>()); + _vx = v_abs(x); + _vy = v_mul(_vx, v_cephes_FOPI); + + emm2 = v_trunc(_vy); + emm2 = v_add(emm2, v_setall_<_TpVec32S>(1)); + emm2 = v_and(emm2, v_setall_<_TpVec32S>(~1)); + _vy = v_cvt_f32(emm2); + + _TpVec32F poly_mask = v_reinterpret_as_f32(v_eq(v_and(emm2, v_setall_<_TpVec32S>(2)), v_setall_<_TpVec32S>(0))); + + _vx = v_fma(_vy, v_minus_DP1, _vx); + _vx = v_fma(_vy, v_minus_DP2, _vx); + _vx = v_fma(_vy, v_minus_DP3, _vx); + + sign_mask_sin = v_xor(sign_mask_sin, v_reinterpret_as_f32(v_eq(v_and(emm2, v_setall_<_TpVec32S>(4)), v_setall_<_TpVec32S>(0)))); + sign_mask_cos = v_reinterpret_as_f32(v_eq(v_and(v_sub(emm2, v_setall_<_TpVec32S>(2)), v_setall_<_TpVec32S>(4)), v_setall_<_TpVec32S>(0))); + + _TpVec32F _vxx = v_mul(_vx, _vx); + _TpVec32F y1, y2; + + y1 = v_fma(v_coscof_p0, _vxx, v_coscof_p1); + y1 = v_fma(y1, _vxx, v_coscof_p2); + y1 = v_fma(y1, _vxx, v_setall_<_TpVec32F>(-0.5f)); + y1 = v_fma(y1, _vxx, v_setall_<_TpVec32F>(1.f)); + + y2 = v_fma(v_sincof_p0, _vxx, v_sincof_p1); + y2 = v_fma(y2, _vxx, v_sincof_p2); + y2 = v_mul(y2, _vxx); + y2 = v_fma(y2, _vx, _vx); + + ysin = v_select(poly_mask, y2, y1); + ycos = v_select(poly_mask, y1, y2); + ysin = v_select(sign_mask_sin, ysin, v_xor(v_neg_zero, ysin)); + ycos = v_select(sign_mask_cos, v_xor(v_neg_zero, ycos), ycos); + + // sincos(NAN) -> NAN, sincos(±INF) -> NAN + _TpVec32F mask_inf = v_eq(_vx, v_reinterpret_as_f32(v_setall_<_TpVec32S>((int)0x7f800000))); + _TpVec32F mask_nan = v_or(mask_inf, v_ne(x, x)); + ysin = v_select(mask_nan, v_nan, ysin); + ycos = v_select(mask_nan, v_nan, ycos); +} + +template +inline _TpVec32F v_sin_default_32f(const _TpVec32F &x) { + _TpVec32F ysin, ycos; + v_sincos_default_32f<_TpVec32F, _TpVec32S>(x, ysin, ycos); + return ysin; +} + +template +inline _TpVec32F v_cos_default_32f(const _TpVec32F &x) { + _TpVec32F ysin, ycos; + v_sincos_default_32f<_TpVec32F, _TpVec32S>(x, ysin, ycos); + return ycos; +} + +template +inline void v_sincos_default_64f(const _TpVec64F &x, _TpVec64F &ysin, _TpVec64F &ycos) { + const _TpVec64F v_cephes_FOPI = v_setall_<_TpVec64F>(1.2732395447351626861510701069801148); // 4 / M_PI + const _TpVec64F v_minus_DP1 = v_setall_<_TpVec64F>(-7.853981554508209228515625E-1); + const _TpVec64F v_minus_DP2 = v_setall_<_TpVec64F>(-7.94662735614792836714E-9); + const _TpVec64F v_minus_DP3 = v_setall_<_TpVec64F>(-3.06161699786838294307E-17); + const _TpVec64F v_sin_C1 = v_setall_<_TpVec64F>(1.58962301576546568060E-10); + const _TpVec64F v_sin_C2 = v_setall_<_TpVec64F>(-2.50507477628578072866E-8); + const _TpVec64F v_sin_C3 = v_setall_<_TpVec64F>(2.75573136213857245213E-6); + const _TpVec64F v_sin_C4 = v_setall_<_TpVec64F>(-1.98412698295895385996E-4); + const _TpVec64F v_sin_C5 = v_setall_<_TpVec64F>(8.33333333332211858878E-3); + const _TpVec64F v_sin_C6 = v_setall_<_TpVec64F>(-1.66666666666666307295E-1); + const _TpVec64F v_cos_C1 = v_setall_<_TpVec64F>(-1.13585365213876817300E-11); + const _TpVec64F v_cos_C2 = v_setall_<_TpVec64F>(2.08757008419747316778E-9); + const _TpVec64F v_cos_C3 = v_setall_<_TpVec64F>(-2.75573141792967388112E-7); + const _TpVec64F v_cos_C4 = v_setall_<_TpVec64F>(2.48015872888517045348E-5); + const _TpVec64F v_cos_C5 = v_setall_<_TpVec64F>(-1.38888888888730564116E-3); + const _TpVec64F v_cos_C6 = v_setall_<_TpVec64F>(4.16666666666665929218E-2); + const _TpVec64F v_nan = v_reinterpret_as_f64(v_setall_<_TpVec64S>((int64)0x7ff8000000000000)); + const _TpVec64F v_neg_zero = v_setall_<_TpVec64F>(-0.0); + + _TpVec64F _vx, _vy, sign_mask_sin, sign_mask_cos; + _TpVec64S emm2; + + sign_mask_sin = v_lt(x, v_setzero_<_TpVec64F>()); + _vx = v_abs(x); + _vy = v_mul(_vx, v_cephes_FOPI); + + emm2 = v_expand_low(v_trunc(_vy)); + emm2 = v_add(emm2, v_setall_<_TpVec64S>((int64)1)); + emm2 = v_and(emm2, v_setall_<_TpVec64S>((int64)~1)); + _vy = v_cvt_f64(emm2); + + _TpVec64F poly_mask = v_reinterpret_as_f64(v_eq(v_and(emm2, v_setall_<_TpVec64S>((int64)2)), v_setall_<_TpVec64S>((int64)0))); + + _vx = v_fma(_vy, v_minus_DP1, _vx); + _vx = v_fma(_vy, v_minus_DP2, _vx); + _vx = v_fma(_vy, v_minus_DP3, _vx); + + sign_mask_sin = v_xor(sign_mask_sin, v_reinterpret_as_f64(v_eq(v_and(emm2, v_setall_<_TpVec64S>((int64)4)), v_setall_<_TpVec64S>((int64)0)))); + sign_mask_cos = v_reinterpret_as_f64(v_eq(v_and(v_sub(emm2, v_setall_<_TpVec64S>((int64)2)), v_setall_<_TpVec64S>((int64)4)), v_setall_<_TpVec64S>((int64)0))); + + _TpVec64F _vxx = v_mul(_vx, _vx); + _TpVec64F y1, y2; + + y1 = v_fma(v_cos_C1, _vxx, v_cos_C2); + y1 = v_fma(y1, _vxx, v_cos_C3); + y1 = v_fma(y1, _vxx, v_cos_C4); + y1 = v_fma(y1, _vxx, v_cos_C5); + y1 = v_fma(y1, _vxx, v_cos_C6); + y1 = v_fma(y1, _vxx, v_setall_<_TpVec64F>(-0.5)); + y1 = v_fma(y1, _vxx, v_setall_<_TpVec64F>(1.0)); + + y2 = v_fma(v_sin_C1, _vxx, v_sin_C2); + y2 = v_fma(y2, _vxx, v_sin_C3); + y2 = v_fma(y2, _vxx, v_sin_C4); + y2 = v_fma(y2, _vxx, v_sin_C5); + y2 = v_fma(y2, _vxx, v_sin_C6); + y2 = v_mul(y2, _vxx); + y2 = v_fma(y2, _vx, _vx); + + ysin = v_select(poly_mask, y2, y1); + ycos = v_select(poly_mask, y1, y2); + ysin = v_select(sign_mask_sin, ysin, v_xor(v_neg_zero, ysin)); + ycos = v_select(sign_mask_cos, v_xor(v_neg_zero, ycos), ycos); + + // sincos(NAN) -> NAN, sincos(±INF) -> NAN + _TpVec64F mask_inf = v_eq(_vx, v_reinterpret_as_f64(v_setall_<_TpVec64S>((int64)0x7ff0000000000000))); + _TpVec64F mask_nan = v_or(mask_inf, v_ne(x, x)); + ysin = v_select(mask_nan, v_nan, ysin); + ycos = v_select(mask_nan, v_nan, ycos); +} + +template +inline _TpVec64F v_sin_default_64f(const _TpVec64F &x) { + _TpVec64F ysin, ycos; + v_sincos_default_64f<_TpVec64F, _TpVec64S>(x, ysin, ycos); + return ysin; +} + +template +inline _TpVec64F v_cos_default_64f(const _TpVec64F &x) { + _TpVec64F ysin, ycos; + v_sincos_default_64f<_TpVec64F, _TpVec64S>(x, ysin, ycos); + return ycos; +} +//! @} + + /* This implementation is derived from the approximation approach of Error Function (Erf) from PyTorch https://github.com/pytorch/pytorch/blob/9c50ecc84b9a6e699a7f058891b889aafbf976c7/aten/src/ATen/cpu/vec/vec512/vec512_float.h#L189-L220 */ diff --git a/modules/core/include/opencv2/core/hal/intrin_msa.hpp b/modules/core/include/opencv2/core/hal/intrin_msa.hpp index 94dc8f55e5..3917faa292 100644 --- a/modules/core/include/opencv2/core/hal/intrin_msa.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_msa.hpp @@ -1864,12 +1864,18 @@ inline void v_pack_store(hfloat* ptr, const v_float32x4& v) inline void v_cleanup() {} #include "intrin_math.hpp" -inline v_float32x4 v_exp(v_float32x4 x) { return v_exp_default_32f(x); } -inline v_float32x4 v_log(v_float32x4 x) { return v_log_default_32f(x); } -inline v_float32x4 v_erf(v_float32x4 x) { return v_erf_default_32f(x); } - -inline v_float64x2 v_exp(v_float64x2 x) { return v_exp_default_64f(x); } -inline v_float64x2 v_log(v_float64x2 x) { return v_log_default_64f(x); } +inline v_float32x4 v_exp(const v_float32x4& x) { return v_exp_default_32f(x); } +inline v_float32x4 v_log(const v_float32x4& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x4& x, v_float32x4& s, v_float32x4& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x4 v_sin(const v_float32x4& x) { return v_sin_default_32f(x); } +inline v_float32x4 v_cos(const v_float32x4& x) { return v_cos_default_32f(x); } +inline v_float32x4 v_erf(const v_float32x4& x) { return v_erf_default_32f(x); } + +inline v_float64x2 v_exp(const v_float64x2& x) { return v_exp_default_64f(x); } +inline v_float64x2 v_log(const v_float64x2& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x2& x, v_float64x2& s, v_float64x2& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x2 v_sin(const v_float64x2& x) { return v_sin_default_64f(x); } +inline v_float64x2 v_cos(const v_float64x2& x) { return v_cos_default_64f(x); } CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index da8dd4acfa..64fb7d73bc 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -2650,15 +2650,24 @@ inline void v_cleanup() {} #include "intrin_math.hpp" #if defined(CV_SIMD_FP16) && CV_SIMD_FP16 -inline v_float16x8 v_exp(v_float16x8 x) { return v_exp_default_16f(x); } -inline v_float16x8 v_log(v_float16x8 x) { return v_log_default_16f(x); } +inline v_float16x8 v_exp(const v_float16x8& x) { return v_exp_default_16f(x); } +inline v_float16x8 v_log(const v_float16x8& x) { return v_log_default_16f(x); } +inline void v_sincos(const v_float16x8& x, v_float16x8& s, v_float16x8& c) { v_sincos_default_16f(x, s, c); } +inline v_float16x8 v_sin(const v_float16x8& x) { return v_sin_default_16f(x); } +inline v_float16x8 v_cos(const v_float16x8& x) { return v_cos_default_16f(x); } #endif -inline v_float32x4 v_exp(v_float32x4 x) { return v_exp_default_32f(x); } -inline v_float32x4 v_log(v_float32x4 x) { return v_log_default_32f(x); } -inline v_float32x4 v_erf(v_float32x4 x) { return v_erf_default_32f(x); } +inline v_float32x4 v_exp(const v_float32x4& x) { return v_exp_default_32f(x); } +inline v_float32x4 v_log(const v_float32x4& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x4& x, v_float32x4& s, v_float32x4& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x4 v_sin(const v_float32x4& x) { return v_sin_default_32f(x); } +inline v_float32x4 v_cos(const v_float32x4& x) { return v_cos_default_32f(x); } +inline v_float32x4 v_erf(const v_float32x4& x) { return v_erf_default_32f(x); } #if CV_SIMD128_64F -inline v_float64x2 v_exp(v_float64x2 x) { return v_exp_default_64f(x); } -inline v_float64x2 v_log(v_float64x2 x) { return v_log_default_64f(x); } +inline v_float64x2 v_exp(const v_float64x2& x) { return v_exp_default_64f(x); } +inline v_float64x2 v_log(const v_float64x2& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x2& x, v_float64x2& s, v_float64x2& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x2 v_sin(const v_float64x2& x) { return v_sin_default_64f(x); } +inline v_float64x2 v_cos(const v_float64x2& x) { return v_cos_default_64f(x); } #endif CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp index 88cc13e4c6..146335dc01 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp @@ -2867,12 +2867,18 @@ inline void v_pack_store(hfloat* ptr, const v_float32x4& v) inline void v_cleanup() {} #include "intrin_math.hpp" -inline v_float32x4 v_exp(v_float32x4 x) { return v_exp_default_32f(x); } -inline v_float32x4 v_log(v_float32x4 x) { return v_log_default_32f(x); } -inline v_float32x4 v_erf(v_float32x4 x) { return v_erf_default_32f(x); } - -inline v_float64x2 v_exp(v_float64x2 x) { return v_exp_default_64f(x); } -inline v_float64x2 v_log(v_float64x2 x) { return v_log_default_64f(x); } +inline v_float32x4 v_exp(const v_float32x4& x) { return v_exp_default_32f(x); } +inline v_float32x4 v_log(const v_float32x4& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x4& x, v_float32x4& s, v_float32x4& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x4 v_sin(const v_float32x4& x) { return v_sin_default_32f(x); } +inline v_float32x4 v_cos(const v_float32x4& x) { return v_cos_default_32f(x); } +inline v_float32x4 v_erf(const v_float32x4& x) { return v_erf_default_32f(x); } + +inline v_float64x2 v_exp(const v_float64x2& x) { return v_exp_default_64f(x); } +inline v_float64x2 v_log(const v_float64x2& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x2& x, v_float64x2& s, v_float64x2& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x2 v_sin(const v_float64x2& x) { return v_sin_default_64f(x); } +inline v_float64x2 v_cos(const v_float64x2& x) { return v_cos_default_64f(x); } CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp index de80e2fccd..2827449ac3 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp @@ -2181,12 +2181,18 @@ inline v_float32 v_matmuladd(const v_float32& v, const v_float32& m0, inline void v_cleanup() {} #include "intrin_math.hpp" -inline v_float32 v_exp(v_float32 x) { return v_exp_default_32f(x); } -inline v_float32 v_log(v_float32 x) { return v_log_default_32f(x); } -inline v_float32 v_erf(v_float32 x) { return v_erf_default_32f(x); } - -inline v_float64 v_exp(v_float64 x) { return v_exp_default_64f(x); } -inline v_float64 v_log(v_float64 x) { return v_log_default_64f(x); } +inline v_float32 v_exp(const v_float32& x) { return v_exp_default_32f(x); } +inline v_float32 v_log(const v_float32& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32& x, v_float32& s, v_float32& c) { v_sincos_default_32f(x, s, c); } +inline v_float32 v_sin(const v_float32& x) { return v_sin_default_32f(x); } +inline v_float32 v_cos(const v_float32& x) { return v_cos_default_32f(x); } +inline v_float32 v_erf(const v_float32& x) { return v_erf_default_32f(x); } + +inline v_float64 v_exp(const v_float64& x) { return v_exp_default_64f(x); } +inline v_float64 v_log(const v_float64& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64& x, v_float64& s, v_float64& c) { v_sincos_default_64f(x, s, c); } +inline v_float64 v_sin(const v_float64& x) { return v_sin_default_64f(x); } +inline v_float64 v_cos(const v_float64& x) { return v_cos_default_64f(x); } CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END diff --git a/modules/core/include/opencv2/core/hal/intrin_sse.hpp b/modules/core/include/opencv2/core/hal/intrin_sse.hpp index 6f6cbbf9fd..369cd2fbc6 100644 --- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp @@ -3460,12 +3460,19 @@ inline void v_pack_store(hfloat* ptr, const v_float32x4& v) inline void v_cleanup() {} #include "intrin_math.hpp" -inline v_float32x4 v_exp(v_float32x4 x) { return v_exp_default_32f(x); } -inline v_float32x4 v_log(v_float32x4 x) { return v_log_default_32f(x); } -inline v_float32x4 v_erf(v_float32x4 x) { return v_erf_default_32f(x); } +inline v_float32x4 v_exp(const v_float32x4& x) { return v_exp_default_32f(x); } +inline v_float32x4 v_log(const v_float32x4& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x4& x, v_float32x4& s, v_float32x4& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x4 v_sin(const v_float32x4& x) { return v_sin_default_32f(x); } +inline v_float32x4 v_cos(const v_float32x4& x) { return v_cos_default_32f(x); } +inline v_float32x4 v_erf(const v_float32x4& x) { return v_erf_default_32f(x); } + +inline v_float64x2 v_exp(const v_float64x2& x) { return v_exp_default_64f(x); } +inline v_float64x2 v_log(const v_float64x2& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x2& x, v_float64x2& s, v_float64x2& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x2 v_sin(const v_float64x2& x) { return v_sin_default_64f(x); } +inline v_float64x2 v_cos(const v_float64x2& x) { return v_cos_default_64f(x); } -inline v_float64x2 v_exp(v_float64x2 x) { return v_exp_default_64f(x); } -inline v_float64x2 v_log(v_float64x2 x) { return v_log_default_64f(x); } CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END diff --git a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp index 99684ba8c3..2157e1e870 100644 --- a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp @@ -1597,12 +1597,18 @@ inline Tvec v_broadcast_element(const Tvec& v) { return Tvec(vec_splat(v.val, i)); } #include "intrin_math.hpp" -inline v_float32x4 v_exp(v_float32x4 x) { return v_exp_default_32f(x); } -inline v_float32x4 v_log(v_float32x4 x) { return v_log_default_32f(x); } -inline v_float32x4 v_erf(v_float32x4 x) { return v_erf_default_32f(x); } - -inline v_float64x2 v_exp(v_float64x2 x) { return v_exp_default_64f(x); } -inline v_float64x2 v_log(v_float64x2 x) { return v_log_default_64f(x); } +inline v_float32x4 v_exp(const v_float32x4& x) { return v_exp_default_32f(x); } +inline v_float32x4 v_log(const v_float32x4& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x4& x, v_float32x4& s, v_float32x4& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x4 v_sin(const v_float32x4& x) { return v_sin_default_32f(x); } +inline v_float32x4 v_cos(const v_float32x4& x) { return v_cos_default_32f(x); } +inline v_float32x4 v_erf(const v_float32x4& x) { return v_erf_default_32f(x); } + +inline v_float64x2 v_exp(const v_float64x2& x) { return v_exp_default_64f(x); } +inline v_float64x2 v_log(const v_float64x2& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x2& x, v_float64x2& s, v_float64x2& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x2 v_sin(const v_float64x2& x) { return v_sin_default_64f(x); } +inline v_float64x2 v_cos(const v_float64x2& x) { return v_cos_default_64f(x); } CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END diff --git a/modules/core/include/opencv2/core/hal/intrin_wasm.hpp b/modules/core/include/opencv2/core/hal/intrin_wasm.hpp index e5c89c94b6..70198451c0 100644 --- a/modules/core/include/opencv2/core/hal/intrin_wasm.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_wasm.hpp @@ -2779,12 +2779,18 @@ inline void v_pack_store(hfloat* ptr, const v_float32x4& v) inline void v_cleanup() {} #include "intrin_math.hpp" -inline v_float32x4 v_exp(v_float32x4 x) { return v_exp_default_32f(x); } -inline v_float32x4 v_log(v_float32x4 x) { return v_log_default_32f(x); } -inline v_float32x4 v_erf(v_float32x4 x) { return v_erf_default_32f(x); } - -inline v_float64x2 v_exp(v_float64x2 x) { return v_exp_default_64f(x); } -inline v_float64x2 v_log(v_float64x2 x) { return v_log_default_64f(x); } +inline v_float32x4 v_exp(const v_float32x4& x) { return v_exp_default_32f(x); } +inline v_float32x4 v_log(const v_float32x4& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x4& x, v_float32x4& s, v_float32x4& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x4 v_sin(const v_float32x4& x) { return v_sin_default_32f(x); } +inline v_float32x4 v_cos(const v_float32x4& x) { return v_cos_default_32f(x); } +inline v_float32x4 v_erf(const v_float32x4& x) { return v_erf_default_32f(x); } + +inline v_float64x2 v_exp(const v_float64x2& x) { return v_exp_default_64f(x); } +inline v_float64x2 v_log(const v_float64x2& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x2& x, v_float64x2& s, v_float64x2& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x2 v_sin(const v_float64x2& x) { return v_sin_default_64f(x); } +inline v_float64x2 v_cos(const v_float64x2& x) { return v_cos_default_64f(x); } CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index 9eed2d2da3..4fbe3d9c3d 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -1912,6 +1912,99 @@ template struct TheTest return *this; } + + void __test_sincos(LaneType diff_thr, LaneType flt_min) { + int n = VTraits::vlanes(); + // Test each value for a period, from -PI to PI + const LaneType step = (LaneType) 0.01; + for (LaneType i = 0; i <= (LaneType)M_PI;) { + Data dataPosPI, dataNegPI; + for (int j = 0; j < n; ++j) { + dataPosPI[j] = i; + dataNegPI[j] = -i; + i += step; + } + R posPI = dataPosPI, negPI = dataNegPI, sinPos, cosPos, sinNeg, cosNeg; + v_sincos(posPI, sinPos, cosPos); + v_sincos(negPI, sinNeg, cosNeg); + Data resSinPos = sinPos, resCosPos = cosPos, resSinNeg = sinNeg, resCosNeg = cosNeg; + for (int j = 0; j < n; ++j) { + LaneType std_sin_pos = (LaneType) std::sin(dataPosPI[j]); + LaneType std_cos_pos = (LaneType) std::cos(dataPosPI[j]); + LaneType std_sin_neg = (LaneType) std::sin(dataNegPI[j]); + LaneType std_cos_neg = (LaneType) std::cos(dataNegPI[j]); + SCOPED_TRACE(cv::format("Period test value: %lf and %lf", (double) dataPosPI[j], (double) dataNegPI[j])); + EXPECT_LT(std::abs(resSinPos[j] - std_sin_pos), diff_thr * (std::abs(std_sin_pos) + flt_min * 100)); + EXPECT_LT(std::abs(resCosPos[j] - std_cos_pos), diff_thr * (std::abs(std_cos_pos) + flt_min * 100)); + EXPECT_LT(std::abs(resSinNeg[j] - std_sin_neg), diff_thr * (std::abs(std_sin_neg) + flt_min * 100)); + EXPECT_LT(std::abs(resCosNeg[j] - std_cos_neg), diff_thr * (std::abs(std_cos_neg) + flt_min * 100)); + } + } + + // Test special values + std::vector specialValues = {(LaneType) 0, (LaneType) M_PI, (LaneType) (M_PI / 2), (LaneType) INFINITY, (LaneType) -INFINITY, (LaneType) NAN}; + const int testRandNum = 10000; + const double specialValueProbability = 0.1; // 10% chance to insert a special value + cv::RNG_MT19937 rng; + + for (int i = 0; i < testRandNum; i++) { + Data dataRand; + for (int j = 0; j < n; ++j) { + if (rng.uniform(0.f, 1.f) <= specialValueProbability) { + // Insert a special value + int specialValueIndex = rng.uniform(0, (int) specialValues.size()); + dataRand[j] = specialValues[specialValueIndex]; + } else { + // Generate uniform random data in [-1000, 1000] + dataRand[j] = (LaneType) rng.uniform(-1000, 1000); + } + } + + // Compare with std::sin and std::cos + R x = dataRand, s, c; + v_sincos(x, s, c); + Data resSin = s, resCos = c; + for (int j = 0; j < n; ++j) { + SCOPED_TRACE(cv::format("Random test value: %lf", (double) dataRand[j])); + LaneType std_sin = (LaneType) std::sin(dataRand[j]); + LaneType std_cos = (LaneType) std::cos(dataRand[j]); + // input NaN, +INF, -INF -> output NaN + if (std::isnan(dataRand[j]) || std::isinf(dataRand[j])) { + EXPECT_TRUE(std::isnan(resSin[j])); + EXPECT_TRUE(std::isnan(resCos[j])); + } else if(dataRand[j] == 0) { + // sin(0) -> 0, cos(0) -> 1 + EXPECT_EQ(resSin[j], 0); + EXPECT_EQ(resCos[j], 1); + } else { + EXPECT_LT(std::abs(resSin[j] - std_sin), diff_thr * (std::abs(std_sin) + flt_min * 100)); + EXPECT_LT(std::abs(resCos[j] - std_cos), diff_thr * (std::abs(std_cos) + flt_min * 100)); + } + } + } + } + + TheTest &test_sincos_fp16() { +#if CV_SIMD_FP16 + hfloat flt16_min; + uint16_t flt16_min_hex = 0x0400; + std::memcpy(&flt16_min, &flt16_min_hex, sizeof(hfloat)); + __test_sincos((hfloat) 1e-3, flt16_min); +#endif + return *this; + } + + TheTest &test_sincos_fp32() { + __test_sincos(1e-6f, FLT_MIN); + return *this; + } + + TheTest &test_sincos_fp64() { +#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F + __test_sincos(1e-11, DBL_MIN); +#endif + return *this; + } }; #define DUMP_ENTRY(type) printf("SIMD%d: %s\n", 8*VTraits::vlanes(), CV__TRACE_FUNCTION); @@ -2227,6 +2320,7 @@ void test_hal_intrin_float32() .test_pack_triplets() .test_exp_fp32() .test_log_fp32() + .test_sincos_fp32() .test_erf_fp32() #if CV_SIMD_WIDTH == 32 .test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>() @@ -2261,6 +2355,7 @@ void test_hal_intrin_float64() .test_extract_highest() .test_exp_fp64() .test_log_fp64() + .test_sincos_fp64() //.test_broadcast_element<0>().test_broadcast_element<1>() #if CV_SIMD_WIDTH == 32 .test_extract<2>().test_extract<3>() @@ -2283,6 +2378,7 @@ void test_hal_intrin_float16() .test_float_cvt_fp16() .test_exp_fp16() .test_log_fp16() + .test_sincos_fp16() #endif ; #else