diff --git a/modules/3d/src/pointcloud/utils.hpp b/modules/3d/src/pointcloud/utils.hpp index 815ec9094c..584836085f 100644 --- a/modules/3d/src/pointcloud/utils.hpp +++ b/modules/3d/src/pointcloud/utils.hpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace cv { diff --git a/modules/3d/src/ptcloud/sampling.cpp b/modules/3d/src/ptcloud/sampling.cpp index 2ee1fa2167..06ee2201b7 100644 --- a/modules/3d/src/ptcloud/sampling.cpp +++ b/modules/3d/src/ptcloud/sampling.cpp @@ -272,7 +272,7 @@ int farthestPointSampling(OutputArray sampled_point_flags, InputArray input_pts, float max_dist_square = 0; int next_pt = sampled_cnt; int i = sampled_cnt; -#ifdef CV_SIMD +#if CV_SIMD v_float32 v_last_p_x = vx_setall_f32(last_pt_x); v_float32 v_last_p_y = vx_setall_f32(last_pt_y); v_float32 v_last_p_z = vx_setall_f32(last_pt_z); diff --git a/modules/3d/src/rgbd/tsdf_functions.hpp b/modules/3d/src/rgbd/tsdf_functions.hpp index 5cc6b68dd6..4f9e2f7676 100644 --- a/modules/3d/src/rgbd/tsdf_functions.hpp +++ b/modules/3d/src/rgbd/tsdf_functions.hpp @@ -41,11 +41,13 @@ struct RGBTsdfVoxel typedef Vec VecRGBTsdfVoxel; +#if CV_SIMD128 inline v_float32x4 tsdfToFloat_INTR(const v_int32x4& num) { v_float32x4 num128 = v_setall_f32(-1.f / 128.f); return v_cvt_f32(num) * num128; } +#endif inline TsdfType floatToTsdf(float num) { diff --git a/modules/3d/src/usac/estimator.cpp b/modules/3d/src/usac/estimator.cpp index af07fafeb8..493f5d4c3b 100644 --- a/modules/3d/src/usac/estimator.cpp +++ b/modules/3d/src/usac/estimator.cpp @@ -638,7 +638,7 @@ public: return errors_cache; int i = 0; -#ifdef CV_SIMD +#if CV_SIMD v_float32 v_a = vx_setall_f32(a); v_float32 v_b = vx_setall_f32(b); v_float32 v_c = vx_setall_f32(c); @@ -718,7 +718,7 @@ public: return errors_cache; int i = 0; -#ifdef CV_SIMD +#if CV_SIMD v_float32 v_center_x = vx_setall_f32(center_x); v_float32 v_center_y = vx_setall_f32(center_y); v_float32 v_center_z = vx_setall_f32(center_z); diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp index b2d1e23e19..eaad52fc01 100644 --- a/modules/core/include/opencv2/core/hal/intrin.hpp +++ b/modules/core/include/opencv2/core/hal/intrin.hpp @@ -720,7 +720,7 @@ namespace CV__SIMD_NAMESPACE { inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); } //! @} - #ifndef OPENCV_HAL_HAVE_LOAD_STORE_BFLOAT16 + #ifndef OPENCV_HAL_HAVE_PACK_STORE_BFLOAT16 inline v_float32 vx_load_expand(const bfloat16_t* ptr) { v_uint32 v = vx_load_expand((const ushort*)ptr); @@ -730,7 +730,7 @@ namespace CV__SIMD_NAMESPACE { inline void v_pack_store(const bfloat16_t* ptr, const v_float32& v) { v_int32 iv = v_shr<16>(v_reinterpret_as_s32(v)); - v_pack_store((short*)ptr, iv); + cv::v_pack_store((short*)ptr, iv); } #endif @@ -967,6 +967,9 @@ namespace CV__SIMD_NAMESPACE { OPENCV_HAL_WRAP_CMP(v_float64x4) #endif #endif + OPENCV_HAL_WRAP_CMP_OP(v_int64, lt, <) \ + OPENCV_HAL_WRAP_CMP_OP(v_int64, gt, >) \ + //////////// get0 //////////// #define OPENCV_HAL_WRAP_GRT0(_Tpvec) \ @@ -1110,7 +1113,7 @@ namespace CV__SIMD_NAMESPACE { #define CV_SIMD 0 #endif -#if (!defined CV_SIMD_64F) || (!CV_SIMD_64F) +#if !CV_SIMD_64F && !CV_SIMD_SCALABLE_64F typedef struct v_float64 { int dummy; } v_float64; #endif diff --git a/modules/core/src/convert.hpp b/modules/core/src/convert.hpp index 9f8e5643d5..7cc6ad5f0c 100644 --- a/modules/core/src/convert.hpp +++ b/modules/core/src/convert.hpp @@ -7,6 +7,7 @@ #define SRC_CONVERT_HPP #include "opencv2/core/types.hpp" +#include "opencv2/core/hal/intrin.hpp" namespace cv { @@ -32,11 +33,11 @@ static inline void vx_load_as(const unsigned* ptr, v_float32& a) { v_uint32 delta = vx_setall_u32(0x80000000U); v_uint32 ua = vx_load(ptr); - v_uint32 mask_a = (ua >= delta) & delta; + v_uint32 mask_a = v_and(v_ge(ua, delta), delta); v_float32 fmask_a = v_cvt_f32(v_reinterpret_as_s32(mask_a)); // 0.f or (float)(-(1 << 31)) - a = v_cvt_f32(v_reinterpret_as_s32(ua - mask_a)); + a = v_cvt_f32(v_reinterpret_as_s32(v_sub(ua, mask_a))); // restore the original values - a -= fmask_a; // subtract 0 or a large negative number + a = v_sub(a, fmask_a); // subtract 0 or a large negative number } static inline void vx_load_as(const float* ptr, v_float32& a) @@ -200,8 +201,8 @@ static inline void vx_load_pair_as(const int64_t* ptr, v_uint64& a, v_uint64& b) { v_int64 z = vx_setzero_s64(); v_int64 ia = vx_load(ptr), ib = vx_load(ptr + VTraits::vlanes()); - ia &= (ia > z); - ib &= (ib > z); + ia = v_and(ia, v_gt(ia, z)); + ib = v_and(ib, v_gt(ib, z)); a = v_reinterpret_as_u64(ia); b = v_reinterpret_as_u64(ib); } @@ -212,10 +213,10 @@ static inline void vx_load_pair_as(const int64_t* ptr, v_uint32& a, v_uint32& b) v_int64 z = vx_setzero_s64(); v_int64 ia0 = vx_load(ptr), ia1 = vx_load(ptr + nlanes); v_int64 ib0 = vx_load(ptr + nlanes*2), ib1 = vx_load(ptr + nlanes*3); - ia0 &= (ia0 > z); - ia1 &= (ia1 > z); - ib0 &= (ib0 > z); - ib1 &= (ib1 > z); + ia0 = v_and(ia0, v_gt(ia0, z)); + ia1 = v_and(ia1, v_gt(ia1, z)); + ib0 = v_and(ib0, v_gt(ib0, z)); + ib1 = v_and(ib1, v_gt(ib1, z)); a = v_pack(v_reinterpret_as_u64(ia0), v_reinterpret_as_u64(ia1)); b = v_pack(v_reinterpret_as_u64(ib0), v_reinterpret_as_u64(ib1)); } @@ -246,7 +247,7 @@ static inline void vx_load_pair_as(const bool* ptr, v_float32& a, v_float32& b) { v_uint16 z = vx_setzero_u16(); v_uint16 uab = vx_load_expand((const uchar*)ptr); - uab = v_shr<15>(uab > z); + uab = v_shr<15>(v_gt(uab, z)); v_int32 ia, ib; v_expand(v_reinterpret_as_s16(uab), ia, ib); a = v_cvt_f32(ia); @@ -257,7 +258,7 @@ static inline void vx_load_as(const bool* ptr, v_float32& a) { v_uint32 z = vx_setzero_u32(); v_uint32 ua = vx_load_expand_q((const uchar*)ptr); - ua = v_shr<31>(ua > z); + ua = v_shr<31>(v_gt(ua, z)); a = v_cvt_f32(v_reinterpret_as_s32(ua)); } @@ -330,14 +331,14 @@ static inline void vx_load_pair_as(const unsigned* ptr, v_float32& a, v_float32& v_uint32 delta = vx_setall_u32(0x80000000U); v_uint32 ua = vx_load(ptr); v_uint32 ub = vx_load(ptr + VTraits::vlanes()); - v_uint32 mask_a = (ua >= delta) & delta, mask_b = (ub >= delta) & delta; + v_uint32 mask_a = v_and(v_ge(ua, delta), delta), mask_b = v_and(v_ge(ub, delta), delta); v_float32 fmask_a = v_cvt_f32(v_reinterpret_as_s32(mask_a)); // 0.f or (float)(-(1 << 31)) v_float32 fmask_b = v_cvt_f32(v_reinterpret_as_s32(mask_b)); // 0.f or (float)(-(1 << 31)) - a = v_cvt_f32(v_reinterpret_as_s32(ua - mask_a)); - b = v_cvt_f32(v_reinterpret_as_s32(ub - mask_b)); + a = v_cvt_f32(v_reinterpret_as_s32(v_sub(ua, mask_a))); + b = v_cvt_f32(v_reinterpret_as_s32(v_sub(ub, mask_b))); // restore the original values - a -= fmask_a; // subtract 0 or a large negative number - b -= fmask_b; // subtract 0 or a large negative number + a = v_sub(a, fmask_a); // subtract 0 or a large negative number + b = v_sub(b, fmask_b); // subtract 0 or a large negative number } static inline void v_store_pair_as(uchar* ptr, const v_uint16& a, const v_uint16& b) @@ -403,8 +404,8 @@ static inline void v_store_pair_as(schar* ptr, const v_float32& a, const v_float static inline void v_store_pair_as(bool* ptr, const v_float32& a, const v_float32& b) { v_float32 z = vx_setzero_f32(); - v_uint32 ma = v_shr<31>(v_reinterpret_as_u32(a != z)); - v_uint32 mb = v_shr<31>(v_reinterpret_as_u32(b != z)); + v_uint32 ma = v_shr<31>(v_reinterpret_as_u32(v_ne(a, z))); + v_uint32 mb = v_shr<31>(v_reinterpret_as_u32(v_ne(b, z))); v_uint16 mab = v_pack(ma, mb); v_pack_store((uchar*)ptr, mab); } @@ -494,7 +495,7 @@ static inline void vx_load_pair_as(const bool* ptr, v_float64& a, v_float64& b) { v_uint32 z = vx_setzero_u32(); v_uint32 uab = vx_load_expand_q((const uchar*)ptr); - uab = v_shr<31>(uab > z); + uab = v_shr<31>(v_gt(uab, z)); v_float32 fab = v_cvt_f32(v_reinterpret_as_s32(uab)); a = v_cvt_f64(fab); b = v_cvt_f64_high(fab);