Merge pull request #25586 from hanliutong:rvv-64f

Fix v_round and enable unit tests for scalable universal intrinsic 64F type. #25586

This may be a legacy issue from the previous PR #24325. I don't quite remember why the float 64 part of the unit test was not enabled at that time.

Whatever, this patch enables the unit tests for scalable 64F type , and makes the necessary modifications to the RVV backend to make the tests pass.

This patch is compiled by GCC 14 and LLVM 17 &18, and tested on QEMU and k230.

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
pull/25619/head
HAN Liutong 8 months ago committed by GitHub
parent c71d495273
commit e52540162f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 7
      modules/core/include/opencv2/core/hal/intrin_rvv_compat_overloaded.hpp
  2. 26
      modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp
  3. 38
      modules/core/test/test_intrin_utils.hpp
  4. 4
      modules/core/test/test_operations.cpp

@ -200,9 +200,14 @@ inline static vuint32mf2_t vmul(const vuint32mf2_t & op1, uint32_t op2, size_t v
return vmul_vx_u32mf2(op1, op2, vl);
}
inline static vuint32mf2_t vreinterpret_u32mf2(vint32mf2_t val)
inline static vuint32mf2_t vreinterpret_u32mf2(const vint32mf2_t& val)
{
return vreinterpret_v_i32mf2_u32mf2(val);
}
inline static vuint32mf2_t vreinterpret_u32mf2(const vuint16mf2_t& val)
{
return vreinterpret_v_u16mf2_u32mf2(val);
}
#endif //OPENCV_HAL_INTRIN_RVV_COMPAT_OVERLOAD_HPP

@ -1528,6 +1528,26 @@ OPENCV_HAL_IMPL_RVV_ZIP(v_uint32, vuint32m2_t, u32, 32, 64, OPENCV_HAL_NOP, OPEN
OPENCV_HAL_IMPL_RVV_ZIP(v_int32, vint32m2_t, i32, 32, 64, vreinterpret_u32m2, vreinterpret_u32m1)
OPENCV_HAL_IMPL_RVV_ZIP(v_float32, vfloat32m2_t, f32, 32, 64, vreinterpret_u32m2, vreinterpret_u32m1)
#if CV_SIMD_SCALABLE_64F
inline void v_zip(const v_float64& a0, const v_float64& a1, v_float64& b0, v_float64& b1) { \
vuint16mf4_t idx0 = vid_v_u16mf4(VTraits<v_float64>::vlanes());
vuint16mf4_t idx1 = vadd(idx0, VTraits<v_float64>::vlanes(), VTraits<v_float64>::vlanes());
vuint16mf2_t idx = vreinterpret_u16mf2(( \
vor(vzext_vf2(idx0, VTraits<v_float64>::vlanes()), \
vreinterpret_u32mf2(vslide1up(vreinterpret_u16mf2(vzext_vf2(idx1, VTraits<v_float64>::vlanes())), 0, VTraits<v_uint32>::vlanes())), \
VTraits<v_uint32>::vlanes())));
#if 0
vfloat64m2_t temp = __riscv_vcreate_v_f64m1_f64m2(a0, a1);
#else // TODO: clean up when RVV Intrinsic is frozen.
vfloat64m2_t temp = vlmul_ext_f64m2(a0);
temp = vset(temp, 1, a1);
#endif
temp = vrgatherei16(temp, idx, VTraits<v_float64>::vlanes()*2);
b0 = vget_f64m1(temp, 0); \
b1 = vget_f64m1(temp, 1); \
}
#endif
#define OPENCV_HAL_IMPL_RVV_UNPACKS(_Tpvec, width) \
inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \
{ \
@ -1859,12 +1879,14 @@ inline v_int32 v_trunc(const v_float32& a)
#if CV_SIMD_SCALABLE_64F
inline v_int32 v_round(const v_float64& a)
{
return vfncvt_x(vlmul_ext_f64m2(vfadd(a, 1e-6, VTraits<v_float64>::vlanes())), VTraits<v_float32>::vlanes());
return vfncvt_x(vlmul_ext_f64m2(a), VTraits<v_float32>::vlanes());
}
inline v_int32 v_round(const v_float64& a, const v_float64& b)
{
return vfncvt_x(vset(vlmul_ext_f64m2(vfadd(a, 1e-6, VTraits<v_float64>::vlanes())), 1, b), VTraits<v_float32>::vlanes());
// return vfncvt_x(vset(vlmul_ext_f64m2(vfadd(a, 1e-6, VTraits<v_float64>::vlanes())), 1, b), VTraits<v_float32>::vlanes());
// Fix https://github.com/opencv/opencv/issues/24746
return vfncvt_x(vset(vlmul_ext_f64m2(a), 1, b), VTraits<v_float32>::vlanes());
}
inline v_int32 v_floor(const v_float64& a)

@ -281,7 +281,7 @@ template<typename R> struct TheTest
v_uint64 vu64 = v_reinterpret_as_u64(r1); out.a.clear(); v_store((uint64*)out.a.d, vu64); EXPECT_EQ(data.a, out.a);
v_int64 vs64 = v_reinterpret_as_s64(r1); out.a.clear(); v_store((int64*)out.a.d, vs64); EXPECT_EQ(data.a, out.a);
v_float32 vf32 = v_reinterpret_as_f32(r1); out.a.clear(); v_store((float*)out.a.d, vf32); EXPECT_EQ(data.a, out.a);
#if CV_SIMD_64F
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
v_float64 vf64 = v_reinterpret_as_f64(r1); out.a.clear(); v_store((double*)out.a.d, vf64); EXPECT_EQ(data.a, out.a);
#endif
@ -747,7 +747,7 @@ template<typename R> struct TheTest
TheTest & test_dotprod_expand_f64()
{
#if CV_SIMD_64F
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
Data<R> dataA, dataB;
dataA += std::numeric_limits<LaneType>::max() - VTraits<R>::vlanes();
dataB += std::numeric_limits<LaneType>::min();
@ -1385,6 +1385,33 @@ template<typename R> struct TheTest
return *this;
}
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
TheTest & test_round_pair_f64()
{
typedef typename V_RegTraits<R>::round_reg Ri;
Data<R> data1, data1_border, data2;
// See https://github.com/opencv/opencv/issues/24213
// https://github.com/opencv/opencv/issues/24163
// https://github.com/opencv/opencv/pull/24271
data1_border *= 0.5;
data1 *= 1.1;
data2 += 10;
R a1 = data1, a1_border = data1_border, a2 = data2;
Data<Ri> resA = v_round(a1, a1),
resB = v_round(a1_border, a1_border),
resC = v_round(a2, a2);
for (int i = 0; i < VTraits<R>::vlanes(); ++i)
{
EXPECT_EQ(cvRound(data1[i]), resA[i]);
EXPECT_EQ(cvRound(data1_border[i]), resB[i]);
EXPECT_EQ(cvRound(data2[i]), resC[i]);
}
return *this;
}
#endif
TheTest & test_float_cvt32()
{
@ -1405,7 +1432,7 @@ template<typename R> struct TheTest
TheTest & test_float_cvt64()
{
#if CV_SIMD_64F
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
typedef v_float64 Rt;
Data<R> dataA;
dataA *= 1.1;
@ -1431,7 +1458,7 @@ template<typename R> struct TheTest
TheTest & test_cvt64_double()
{
#if CV_SIMD_64F
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
Data<R> dataA(std::numeric_limits<LaneType>::max()),
dataB(std::numeric_limits<LaneType>::min());
dataB += VTraits<R>::vlanes();
@ -1994,7 +2021,7 @@ void test_hal_intrin_float32()
void test_hal_intrin_float64()
{
DUMP_ENTRY(v_float64);
#if CV_SIMD_64F
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
TheTest<v_float64>()
.test_loadstore()
.test_addsub()
@ -2008,6 +2035,7 @@ void test_hal_intrin_float64()
.test_mask()
.test_unpack()
.test_float_math()
.test_round_pair_f64()
.test_float_cvt32()
.test_reverse()
.test_extract<0>().test_extract<1>()

@ -1574,11 +1574,7 @@ TEST(Core_Arithm, scalar_handling_19599) // https://github.com/opencv/opencv/is
typedef tuple<perf::MatDepth,int,int,int> Arith_Regression24163Param;
typedef testing::TestWithParam<Arith_Regression24163Param> Core_Arith_Regression24163;
#if defined __riscv
TEST_P(Core_Arith_Regression24163, DISABLED_test_for_ties_to_even)
#else
TEST_P(Core_Arith_Regression24163, test_for_ties_to_even)
#endif
{
const int matDepth = get<0>(GetParam());
const int matHeight= get<1>(GetParam());

Loading…
Cancel
Save