Merge pull request #24262 from mshabunin:fix-riscv-5x

1 year ago · c5ff405d94
parent 416bf3253d c3a37d0fcb
commit c5ff405d94
6 changed files with 32 additions and 25 deletions
--- a/modules/3d/src/pointcloud/utils.hpp
+++ b/modules/3d/src/pointcloud/utils.hpp
@ -10,6 +10,7 @@
 #include <sstream>
 #include <array>
 #include <algorithm>
 #include <cstdint>
 namespace cv {
--- a/modules/3d/src/ptcloud/sampling.cpp
+++ b/modules/3d/src/ptcloud/sampling.cpp
@ -272,7 +272,7 @@ int farthestPointSampling(OutputArray sampled_point_flags, InputArray input_pts,
        float max_dist_square = 0;
        int next_pt = sampled_cnt;
        int i = sampled_cnt;
-#ifdef CV_SIMD
+#if CV_SIMD
        v_float32 v_last_p_x = vx_setall_f32(last_pt_x);
        v_float32 v_last_p_y = vx_setall_f32(last_pt_y);
        v_float32 v_last_p_z = vx_setall_f32(last_pt_z);
--- a/modules/3d/src/rgbd/tsdf_functions.hpp
+++ b/modules/3d/src/rgbd/tsdf_functions.hpp
@ -41,11 +41,13 @@ struct RGBTsdfVoxel
 typedef Vec<uchar, sizeof(RGBTsdfVoxel)> VecRGBTsdfVoxel;
 #if CV_SIMD128
 inline v_float32x4 tsdfToFloat_INTR(const v_int32x4& num)
 {
    v_float32x4 num128 = v_setall_f32(-1.f / 128.f);
    return v_cvt_f32(num) * num128;
 }
 #endif
 inline TsdfType floatToTsdf(float num)
 {
--- a/modules/3d/src/usac/estimator.cpp
+++ b/modules/3d/src/usac/estimator.cpp
@ -638,7 +638,7 @@ public:
            return errors_cache;
        int i = 0;
-#ifdef CV_SIMD
+#if CV_SIMD
        v_float32 v_a = vx_setall_f32(a);
        v_float32 v_b = vx_setall_f32(b);
        v_float32 v_c = vx_setall_f32(c);
@ -718,7 +718,7 @@ public:
            return errors_cache;
        int i = 0;
-#ifdef CV_SIMD
+#if CV_SIMD
        v_float32 v_center_x = vx_setall_f32(center_x);
        v_float32 v_center_y = vx_setall_f32(center_y);
        v_float32 v_center_z = vx_setall_f32(center_z);
--- a/modules/core/include/opencv2/core/hal/intrin.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin.hpp
@ -720,7 +720,7 @@ namespace CV__SIMD_NAMESPACE {
    inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
    //! @}
-    #ifndef OPENCV_HAL_HAVE_LOAD_STORE_BFLOAT16
+    #ifndef OPENCV_HAL_HAVE_PACK_STORE_BFLOAT16
    inline v_float32 vx_load_expand(const bfloat16_t* ptr)
    {
        v_uint32 v = vx_load_expand((const ushort*)ptr);
@ -730,7 +730,7 @@ namespace CV__SIMD_NAMESPACE {
    inline void v_pack_store(const bfloat16_t* ptr, const v_float32& v)
    {
        v_int32 iv = v_shr<16>(v_reinterpret_as_s32(v));
-        v_pack_store((short*)ptr, iv);
+        cv::v_pack_store((short*)ptr, iv);
    }
    #endif
@ -967,6 +967,9 @@ namespace CV__SIMD_NAMESPACE {
        OPENCV_HAL_WRAP_CMP(v_float64x4)
        #endif
    #endif
    OPENCV_HAL_WRAP_CMP_OP(v_int64, lt, <) \
    OPENCV_HAL_WRAP_CMP_OP(v_int64, gt, >) \
    //////////// get0 ////////////
    #define OPENCV_HAL_WRAP_GRT0(_Tpvec) \
@ -1110,7 +1113,7 @@ namespace CV__SIMD_NAMESPACE {
 #define CV_SIMD 0
 #endif
-#if (!defined CV_SIMD_64F) || (!CV_SIMD_64F)
+#if !CV_SIMD_64F && !CV_SIMD_SCALABLE_64F
 typedef struct v_float64 { int dummy; } v_float64;
 #endif
--- a/modules/core/src/convert.hpp
+++ b/modules/core/src/convert.hpp
@ -7,6 +7,7 @@
 #define SRC_CONVERT_HPP
 #include "opencv2/core/types.hpp"
 #include "opencv2/core/hal/intrin.hpp"
 namespace cv
 {
@ -32,11 +33,11 @@ static inline void vx_load_as(const unsigned* ptr, v_float32& a)
 {
    v_uint32 delta = vx_setall_u32(0x80000000U);
    v_uint32 ua = vx_load(ptr);
-    v_uint32 mask_a = (ua >= delta) & delta;
+    v_uint32 mask_a = v_and(v_ge(ua, delta), delta);
    v_float32 fmask_a = v_cvt_f32(v_reinterpret_as_s32(mask_a)); // 0.f or (float)(-(1 << 31))
-    a = v_cvt_f32(v_reinterpret_as_s32(ua - mask_a));
+    a = v_cvt_f32(v_reinterpret_as_s32(v_sub(ua, mask_a)));
    // restore the original values
-    a -= fmask_a; // subtract 0 or a large negative number
+    a = v_sub(a, fmask_a); // subtract 0 or a large negative number
 }
 static inline void vx_load_as(const float* ptr, v_float32& a)
@ -200,8 +201,8 @@ static inline void vx_load_pair_as(const int64_t* ptr, v_uint64& a, v_uint64& b)
 {
    v_int64 z = vx_setzero_s64();
    v_int64 ia = vx_load(ptr), ib = vx_load(ptr + VTraits<v_uint64>::vlanes());
-    ia &= (ia > z);
+    ia = v_and(ia, v_gt(ia, z));
-    ib &= (ib > z);
+    ib = v_and(ib, v_gt(ib, z));
    a = v_reinterpret_as_u64(ia);
    b = v_reinterpret_as_u64(ib);
 }
@ -212,10 +213,10 @@ static inline void vx_load_pair_as(const int64_t* ptr, v_uint32& a, v_uint32& b)
    v_int64 z = vx_setzero_s64();
    v_int64 ia0 = vx_load(ptr), ia1 = vx_load(ptr + nlanes);
    v_int64 ib0 = vx_load(ptr + nlanes*2), ib1 = vx_load(ptr + nlanes*3);
-    ia0 &= (ia0 > z);
+    ia0 = v_and(ia0, v_gt(ia0, z));
-    ia1 &= (ia1 > z);
+    ia1 = v_and(ia1, v_gt(ia1, z));
-    ib0 &= (ib0 > z);
+    ib0 = v_and(ib0, v_gt(ib0, z));
-    ib1 &= (ib1 > z);
+    ib1 = v_and(ib1, v_gt(ib1, z));
    a = v_pack(v_reinterpret_as_u64(ia0), v_reinterpret_as_u64(ia1));
    b = v_pack(v_reinterpret_as_u64(ib0), v_reinterpret_as_u64(ib1));
 }
@ -246,7 +247,7 @@ static inline void vx_load_pair_as(const bool* ptr, v_float32& a, v_float32& b)
 {
    v_uint16 z = vx_setzero_u16();
    v_uint16 uab = vx_load_expand((const uchar*)ptr);
-    uab = v_shr<15>(uab > z);
+    uab = v_shr<15>(v_gt(uab, z));
    v_int32 ia, ib;
    v_expand(v_reinterpret_as_s16(uab), ia, ib);
    a = v_cvt_f32(ia);
@ -257,7 +258,7 @@ static inline void vx_load_as(const bool* ptr, v_float32& a)
 {
    v_uint32 z = vx_setzero_u32();
    v_uint32 ua = vx_load_expand_q((const uchar*)ptr);
-    ua = v_shr<31>(ua > z);
+    ua = v_shr<31>(v_gt(ua, z));
    a = v_cvt_f32(v_reinterpret_as_s32(ua));
 }
@ -330,14 +331,14 @@ static inline void vx_load_pair_as(const unsigned* ptr, v_float32& a, v_float32&
    v_uint32 delta = vx_setall_u32(0x80000000U);
    v_uint32 ua = vx_load(ptr);
    v_uint32 ub = vx_load(ptr + VTraits<v_uint32>::vlanes());
-    v_uint32 mask_a = (ua >= delta) & delta, mask_b = (ub >= delta) & delta;
+    v_uint32 mask_a = v_and(v_ge(ua, delta), delta), mask_b = v_and(v_ge(ub, delta), delta);
    v_float32 fmask_a = v_cvt_f32(v_reinterpret_as_s32(mask_a)); // 0.f or (float)(-(1 << 31))
    v_float32 fmask_b = v_cvt_f32(v_reinterpret_as_s32(mask_b)); // 0.f or (float)(-(1 << 31))
-    a = v_cvt_f32(v_reinterpret_as_s32(ua - mask_a));
+    a = v_cvt_f32(v_reinterpret_as_s32(v_sub(ua, mask_a)));
-    b = v_cvt_f32(v_reinterpret_as_s32(ub - mask_b));
+    b = v_cvt_f32(v_reinterpret_as_s32(v_sub(ub, mask_b)));
    // restore the original values
-    a -= fmask_a; // subtract 0 or a large negative number
+    a = v_sub(a, fmask_a); // subtract 0 or a large negative number
-    b -= fmask_b; // subtract 0 or a large negative number
+    b = v_sub(b, fmask_b); // subtract 0 or a large negative number
 }
 static inline void v_store_pair_as(uchar* ptr, const v_uint16& a, const v_uint16& b)
@ -403,8 +404,8 @@ static inline void v_store_pair_as(schar* ptr, const v_float32& a, const v_float
 static inline void v_store_pair_as(bool* ptr, const v_float32& a, const v_float32& b)
 {
    v_float32 z = vx_setzero_f32();
-    v_uint32 ma = v_shr<31>(v_reinterpret_as_u32(a != z));
+    v_uint32 ma = v_shr<31>(v_reinterpret_as_u32(v_ne(a, z)));
-    v_uint32 mb = v_shr<31>(v_reinterpret_as_u32(b != z));
+    v_uint32 mb = v_shr<31>(v_reinterpret_as_u32(v_ne(b, z)));
    v_uint16 mab = v_pack(ma, mb);
    v_pack_store((uchar*)ptr, mab);
 }
@ -494,7 +495,7 @@ static inline void vx_load_pair_as(const bool* ptr, v_float64& a, v_float64& b)
 {
    v_uint32 z = vx_setzero_u32();
    v_uint32 uab = vx_load_expand_q((const uchar*)ptr);
-    uab = v_shr<31>(uab > z);
+    uab = v_shr<31>(v_gt(uab, z));
    v_float32 fab = v_cvt_f32(v_reinterpret_as_s32(uab));
    a = v_cvt_f64(fab);
    b = v_cvt_f64_high(fab);