diff --git a/modules/3d/src/pointcloud/utils.hpp b/modules/3d/src/pointcloud/utils.hpp
index 815ec9094c..584836085f 100644
--- a/modules/3d/src/pointcloud/utils.hpp
+++ b/modules/3d/src/pointcloud/utils.hpp
@@ -10,6 +10,7 @@
 #include <sstream>
 #include <array>
 #include <algorithm>
+#include <cstdint>
 
 
 namespace cv {
diff --git a/modules/3d/src/ptcloud/sampling.cpp b/modules/3d/src/ptcloud/sampling.cpp
index 2ee1fa2167..06ee2201b7 100644
--- a/modules/3d/src/ptcloud/sampling.cpp
+++ b/modules/3d/src/ptcloud/sampling.cpp
@@ -272,7 +272,7 @@ int farthestPointSampling(OutputArray sampled_point_flags, InputArray input_pts,
         float max_dist_square = 0;
         int next_pt = sampled_cnt;
         int i = sampled_cnt;
-#ifdef CV_SIMD
+#if CV_SIMD
         v_float32 v_last_p_x = vx_setall_f32(last_pt_x);
         v_float32 v_last_p_y = vx_setall_f32(last_pt_y);
         v_float32 v_last_p_z = vx_setall_f32(last_pt_z);
diff --git a/modules/3d/src/rgbd/tsdf_functions.hpp b/modules/3d/src/rgbd/tsdf_functions.hpp
index 5cc6b68dd6..4f9e2f7676 100644
--- a/modules/3d/src/rgbd/tsdf_functions.hpp
+++ b/modules/3d/src/rgbd/tsdf_functions.hpp
@@ -41,11 +41,13 @@ struct RGBTsdfVoxel
 
 typedef Vec<uchar, sizeof(RGBTsdfVoxel)> VecRGBTsdfVoxel;
 
+#if CV_SIMD128
 inline v_float32x4 tsdfToFloat_INTR(const v_int32x4& num)
 {
     v_float32x4 num128 = v_setall_f32(-1.f / 128.f);
     return v_cvt_f32(num) * num128;
 }
+#endif
 
 inline TsdfType floatToTsdf(float num)
 {
diff --git a/modules/3d/src/usac/estimator.cpp b/modules/3d/src/usac/estimator.cpp
index af07fafeb8..493f5d4c3b 100644
--- a/modules/3d/src/usac/estimator.cpp
+++ b/modules/3d/src/usac/estimator.cpp
@@ -638,7 +638,7 @@ public:
             return errors_cache;
 
         int i = 0;
-#ifdef CV_SIMD
+#if CV_SIMD
         v_float32 v_a = vx_setall_f32(a);
         v_float32 v_b = vx_setall_f32(b);
         v_float32 v_c = vx_setall_f32(c);
@@ -718,7 +718,7 @@ public:
             return errors_cache;
 
         int i = 0;
-#ifdef CV_SIMD
+#if CV_SIMD
         v_float32 v_center_x = vx_setall_f32(center_x);
         v_float32 v_center_y = vx_setall_f32(center_y);
         v_float32 v_center_z = vx_setall_f32(center_z);
diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp
index b2d1e23e19..eaad52fc01 100644
--- a/modules/core/include/opencv2/core/hal/intrin.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin.hpp
@@ -720,7 +720,7 @@ namespace CV__SIMD_NAMESPACE {
     inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
     //! @}
 
-    #ifndef OPENCV_HAL_HAVE_LOAD_STORE_BFLOAT16
+    #ifndef OPENCV_HAL_HAVE_PACK_STORE_BFLOAT16
     inline v_float32 vx_load_expand(const bfloat16_t* ptr)
     {
         v_uint32 v = vx_load_expand((const ushort*)ptr);
@@ -730,7 +730,7 @@ namespace CV__SIMD_NAMESPACE {
     inline void v_pack_store(const bfloat16_t* ptr, const v_float32& v)
     {
         v_int32 iv = v_shr<16>(v_reinterpret_as_s32(v));
-        v_pack_store((short*)ptr, iv);
+        cv::v_pack_store((short*)ptr, iv);
     }
 
     #endif
@@ -967,6 +967,9 @@ namespace CV__SIMD_NAMESPACE {
         OPENCV_HAL_WRAP_CMP(v_float64x4)
         #endif
     #endif
+    OPENCV_HAL_WRAP_CMP_OP(v_int64, lt, <) \
+    OPENCV_HAL_WRAP_CMP_OP(v_int64, gt, >) \
+
 
     //////////// get0 ////////////
     #define OPENCV_HAL_WRAP_GRT0(_Tpvec) \
@@ -1110,7 +1113,7 @@ namespace CV__SIMD_NAMESPACE {
 #define CV_SIMD 0
 #endif
 
-#if (!defined CV_SIMD_64F) || (!CV_SIMD_64F)
+#if !CV_SIMD_64F && !CV_SIMD_SCALABLE_64F
 typedef struct v_float64 { int dummy; } v_float64;
 #endif
 
diff --git a/modules/core/src/convert.hpp b/modules/core/src/convert.hpp
index 9f8e5643d5..7cc6ad5f0c 100644
--- a/modules/core/src/convert.hpp
+++ b/modules/core/src/convert.hpp
@@ -7,6 +7,7 @@
 #define SRC_CONVERT_HPP
 
 #include "opencv2/core/types.hpp"
+#include "opencv2/core/hal/intrin.hpp"
 
 namespace cv
 {
@@ -32,11 +33,11 @@ static inline void vx_load_as(const unsigned* ptr, v_float32& a)
 {
     v_uint32 delta = vx_setall_u32(0x80000000U);
     v_uint32 ua = vx_load(ptr);
-    v_uint32 mask_a = (ua >= delta) & delta;
+    v_uint32 mask_a = v_and(v_ge(ua, delta), delta);
     v_float32 fmask_a = v_cvt_f32(v_reinterpret_as_s32(mask_a)); // 0.f or (float)(-(1 << 31))
-    a = v_cvt_f32(v_reinterpret_as_s32(ua - mask_a));
+    a = v_cvt_f32(v_reinterpret_as_s32(v_sub(ua, mask_a)));
     // restore the original values
-    a -= fmask_a; // subtract 0 or a large negative number
+    a = v_sub(a, fmask_a); // subtract 0 or a large negative number
 }
 
 static inline void vx_load_as(const float* ptr, v_float32& a)
@@ -200,8 +201,8 @@ static inline void vx_load_pair_as(const int64_t* ptr, v_uint64& a, v_uint64& b)
 {
     v_int64 z = vx_setzero_s64();
     v_int64 ia = vx_load(ptr), ib = vx_load(ptr + VTraits<v_uint64>::vlanes());
-    ia &= (ia > z);
-    ib &= (ib > z);
+    ia = v_and(ia, v_gt(ia, z));
+    ib = v_and(ib, v_gt(ib, z));
     a = v_reinterpret_as_u64(ia);
     b = v_reinterpret_as_u64(ib);
 }
@@ -212,10 +213,10 @@ static inline void vx_load_pair_as(const int64_t* ptr, v_uint32& a, v_uint32& b)
     v_int64 z = vx_setzero_s64();
     v_int64 ia0 = vx_load(ptr), ia1 = vx_load(ptr + nlanes);
     v_int64 ib0 = vx_load(ptr + nlanes*2), ib1 = vx_load(ptr + nlanes*3);
-    ia0 &= (ia0 > z);
-    ia1 &= (ia1 > z);
-    ib0 &= (ib0 > z);
-    ib1 &= (ib1 > z);
+    ia0 = v_and(ia0, v_gt(ia0, z));
+    ia1 = v_and(ia1, v_gt(ia1, z));
+    ib0 = v_and(ib0, v_gt(ib0, z));
+    ib1 = v_and(ib1, v_gt(ib1, z));
     a = v_pack(v_reinterpret_as_u64(ia0), v_reinterpret_as_u64(ia1));
     b = v_pack(v_reinterpret_as_u64(ib0), v_reinterpret_as_u64(ib1));
 }
@@ -246,7 +247,7 @@ static inline void vx_load_pair_as(const bool* ptr, v_float32& a, v_float32& b)
 {
     v_uint16 z = vx_setzero_u16();
     v_uint16 uab = vx_load_expand((const uchar*)ptr);
-    uab = v_shr<15>(uab > z);
+    uab = v_shr<15>(v_gt(uab, z));
     v_int32 ia, ib;
     v_expand(v_reinterpret_as_s16(uab), ia, ib);
     a = v_cvt_f32(ia);
@@ -257,7 +258,7 @@ static inline void vx_load_as(const bool* ptr, v_float32& a)
 {
     v_uint32 z = vx_setzero_u32();
     v_uint32 ua = vx_load_expand_q((const uchar*)ptr);
-    ua = v_shr<31>(ua > z);
+    ua = v_shr<31>(v_gt(ua, z));
     a = v_cvt_f32(v_reinterpret_as_s32(ua));
 }
 
@@ -330,14 +331,14 @@ static inline void vx_load_pair_as(const unsigned* ptr, v_float32& a, v_float32&
     v_uint32 delta = vx_setall_u32(0x80000000U);
     v_uint32 ua = vx_load(ptr);
     v_uint32 ub = vx_load(ptr + VTraits<v_uint32>::vlanes());
-    v_uint32 mask_a = (ua >= delta) & delta, mask_b = (ub >= delta) & delta;
+    v_uint32 mask_a = v_and(v_ge(ua, delta), delta), mask_b = v_and(v_ge(ub, delta), delta);
     v_float32 fmask_a = v_cvt_f32(v_reinterpret_as_s32(mask_a)); // 0.f or (float)(-(1 << 31))
     v_float32 fmask_b = v_cvt_f32(v_reinterpret_as_s32(mask_b)); // 0.f or (float)(-(1 << 31))
-    a = v_cvt_f32(v_reinterpret_as_s32(ua - mask_a));
-    b = v_cvt_f32(v_reinterpret_as_s32(ub - mask_b));
+    a = v_cvt_f32(v_reinterpret_as_s32(v_sub(ua, mask_a)));
+    b = v_cvt_f32(v_reinterpret_as_s32(v_sub(ub, mask_b)));
     // restore the original values
-    a -= fmask_a; // subtract 0 or a large negative number
-    b -= fmask_b; // subtract 0 or a large negative number
+    a = v_sub(a, fmask_a); // subtract 0 or a large negative number
+    b = v_sub(b, fmask_b); // subtract 0 or a large negative number
 }
 
 static inline void v_store_pair_as(uchar* ptr, const v_uint16& a, const v_uint16& b)
@@ -403,8 +404,8 @@ static inline void v_store_pair_as(schar* ptr, const v_float32& a, const v_float
 static inline void v_store_pair_as(bool* ptr, const v_float32& a, const v_float32& b)
 {
     v_float32 z = vx_setzero_f32();
-    v_uint32 ma = v_shr<31>(v_reinterpret_as_u32(a != z));
-    v_uint32 mb = v_shr<31>(v_reinterpret_as_u32(b != z));
+    v_uint32 ma = v_shr<31>(v_reinterpret_as_u32(v_ne(a, z)));
+    v_uint32 mb = v_shr<31>(v_reinterpret_as_u32(v_ne(b, z)));
     v_uint16 mab = v_pack(ma, mb);
     v_pack_store((uchar*)ptr, mab);
 }
@@ -494,7 +495,7 @@ static inline void vx_load_pair_as(const bool* ptr, v_float64& a, v_float64& b)
 {
     v_uint32 z = vx_setzero_u32();
     v_uint32 uab = vx_load_expand_q((const uchar*)ptr);
-    uab = v_shr<31>(uab > z);
+    uab = v_shr<31>(v_gt(uab, z));
     v_float32 fab = v_cvt_f32(v_reinterpret_as_s32(uab));
     a = v_cvt_f64(fab);
     b = v_cvt_f64_high(fab);