Merge pull request #25872 from fengyuentau:core/v_erf

core: add v_erf #25872

This patch adds v_erf, which is needed by https://github.com/opencv/opencv/pull/25147.

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
pull/25877/head
Yuantao Feng 5 months ago committed by GitHub
parent 88b28ee2a0
commit d30b9450c1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 10
      modules/core/include/opencv2/core/hal/intrin_cpp.hpp
  2. 45
      modules/core/include/opencv2/core/hal/intrin_math.hpp
  3. 43
      modules/core/test/test_intrin_utils.hpp

@ -263,7 +263,8 @@ Most of these operations return only one value.
### Other math
- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude, @ref v_exp
- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude, @ref v_exp,
@ref v_erf
- Absolute values: @ref v_abs, @ref v_absdiff, @ref v_absdiffs
### Conversions
@ -761,6 +762,13 @@ OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp)
OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp)
#define OPENCV_HAL_MATH_HAVE_LOG 1
/**
* @brief Error function.
*
* @note Support FP32 precision for now.
*/
OPENCV_HAL_IMPL_MATH_FUNC(v_erf, std::erf, _Tp)
//! @cond IGNORED
OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp)
#define OPENCV_HAL_MATH_HAVE_SIN 1

@ -418,5 +418,50 @@ namespace CV__SIMD_NAMESPACE {
#define OPENCV_HAL_MATH_HAVE_LOG 1
//! @}
#endif
/* This implementation is derived from the approximation approach of Error Function (Erf) from PyTorch
https://github.com/pytorch/pytorch/blob/9c50ecc84b9a6e699a7f058891b889aafbf976c7/aten/src/ATen/cpu/vec/vec512/vec512_float.h#L189-L220
*/
#ifndef OPENCV_HAL_MATH_HAVE_ERF
//! @name Error Function
//! @{
inline v_float32 v_erf(v_float32 v) {
const v_float32 coef0 = vx_setall_f32(0.3275911f),
coef1 = vx_setall_f32(1.061405429f),
coef2 = vx_setall_f32(-1.453152027f),
coef3 = vx_setall_f32(1.421413741f),
coef4 = vx_setall_f32(-0.284496736f),
coef5 = vx_setall_f32(0.254829592f),
ones = vx_setall_f32(1.0f),
neg_zeros = vx_setall_f32(-0.f);
v_float32 t = v_abs(v);
// sign(v)
v_float32 sign_mask = v_and(neg_zeros, v);
t = v_div(ones, v_fma(coef0, t, ones));
v_float32 r = v_fma(coef1, t, coef2);
r = v_fma(r, t, coef3);
r = v_fma(r, t, coef4);
r = v_fma(r, t, coef5);
// - v * v
v_float32 pow_2 = v_mul(v, v);
v_float32 neg_pow_2 = v_xor(neg_zeros, pow_2);
// - exp(- v * v)
v_float32 exp = v_exp(neg_pow_2);
v_float32 neg_exp = v_xor(neg_zeros, exp);
v_float32 res = v_mul(t, neg_exp);
res = v_fma(r, res, ones);
return v_xor(sign_mask, res);
}
#define OPENCV_HAL_MATH_HAVE_ERF 1
//! @}
#endif // OPENCV_HAL_MATH_HAVE_ERF
}
#endif // OPENCV_HAL_INTRIN_HPP

@ -1864,6 +1864,48 @@ template<typename R> struct TheTest
#endif
return *this;
}
TheTest &test_erf_fp32() {
int n = VTraits<R>::vlanes();
constexpr int num_loops = 10000;
const std::vector<LaneType> singular_inputs{INFINITY, -INFINITY, NAN};
constexpr double insert_singular_input_probability = 0.1;
cv::RNG_MT19937 rng;
for (int i = 0; i < num_loops; i++) {
Data<R> inputs;
for (int j = 0; j < n; j++) {
if (rng.uniform(0.f, 1.f) <= insert_singular_input_probability) {
int singular_input_index = rng.uniform(0, int(singular_inputs.size()));
inputs[j] = singular_inputs[singular_input_index];
} else {
// std::exp(float) overflows at about 88.0f.
// In v_erf, exp is called on input*input. So test range is [-sqrt(88.0f), sqrt(88.0f)]
inputs[j] = (LaneType) rng.uniform(-9.4f, 9.4f);
}
}
Data<R> outputs = v_erf(R(inputs));
for (int j = 0; j < n; j++) {
SCOPED_TRACE(cv::format("Random test value: %f", inputs[j]));
if (std::isinf(inputs[j])) {
if (inputs[j] < 0) {
EXPECT_EQ(-1, outputs[j]);
} else {
EXPECT_EQ(1, outputs[j]);
}
} else if (std::isnan(inputs[j])) {
EXPECT_TRUE(std::isnan(outputs[j]));
} else {
LaneType ref_output = std::erf(inputs[j]);
EXPECT_LT(std::abs(outputs[j] - ref_output), 1e-3f * (std::abs(ref_output) + FLT_MIN * 1e4f));
}
}
}
return *this;
}
};
#define DUMP_ENTRY(type) printf("SIMD%d: %s\n", 8*VTraits<v_uint8>::vlanes(), CV__TRACE_FUNCTION);
@ -2179,6 +2221,7 @@ void test_hal_intrin_float32()
.test_pack_triplets()
.test_exp_fp32()
.test_log_fp32()
.test_erf_fp32()
#if CV_SIMD_WIDTH == 32
.test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>()
.test_rotate<4>().test_rotate<5>().test_rotate<6>().test_rotate<7>()

Loading…
Cancel
Save