Merge pull request #24262 from mshabunin:fix-riscv-5x

pull/24338/head
Alexander Alekhin 1 year ago
commit c5ff405d94
  1. 1
      modules/3d/src/pointcloud/utils.hpp
  2. 2
      modules/3d/src/ptcloud/sampling.cpp
  3. 2
      modules/3d/src/rgbd/tsdf_functions.hpp
  4. 4
      modules/3d/src/usac/estimator.cpp
  5. 9
      modules/core/include/opencv2/core/hal/intrin.hpp
  6. 39
      modules/core/src/convert.hpp

@ -10,6 +10,7 @@
#include <sstream> #include <sstream>
#include <array> #include <array>
#include <algorithm> #include <algorithm>
#include <cstdint>
namespace cv { namespace cv {

@ -272,7 +272,7 @@ int farthestPointSampling(OutputArray sampled_point_flags, InputArray input_pts,
float max_dist_square = 0; float max_dist_square = 0;
int next_pt = sampled_cnt; int next_pt = sampled_cnt;
int i = sampled_cnt; int i = sampled_cnt;
#ifdef CV_SIMD #if CV_SIMD
v_float32 v_last_p_x = vx_setall_f32(last_pt_x); v_float32 v_last_p_x = vx_setall_f32(last_pt_x);
v_float32 v_last_p_y = vx_setall_f32(last_pt_y); v_float32 v_last_p_y = vx_setall_f32(last_pt_y);
v_float32 v_last_p_z = vx_setall_f32(last_pt_z); v_float32 v_last_p_z = vx_setall_f32(last_pt_z);

@ -41,11 +41,13 @@ struct RGBTsdfVoxel
typedef Vec<uchar, sizeof(RGBTsdfVoxel)> VecRGBTsdfVoxel; typedef Vec<uchar, sizeof(RGBTsdfVoxel)> VecRGBTsdfVoxel;
#if CV_SIMD128
inline v_float32x4 tsdfToFloat_INTR(const v_int32x4& num) inline v_float32x4 tsdfToFloat_INTR(const v_int32x4& num)
{ {
v_float32x4 num128 = v_setall_f32(-1.f / 128.f); v_float32x4 num128 = v_setall_f32(-1.f / 128.f);
return v_cvt_f32(num) * num128; return v_cvt_f32(num) * num128;
} }
#endif
inline TsdfType floatToTsdf(float num) inline TsdfType floatToTsdf(float num)
{ {

@ -638,7 +638,7 @@ public:
return errors_cache; return errors_cache;
int i = 0; int i = 0;
#ifdef CV_SIMD #if CV_SIMD
v_float32 v_a = vx_setall_f32(a); v_float32 v_a = vx_setall_f32(a);
v_float32 v_b = vx_setall_f32(b); v_float32 v_b = vx_setall_f32(b);
v_float32 v_c = vx_setall_f32(c); v_float32 v_c = vx_setall_f32(c);
@ -718,7 +718,7 @@ public:
return errors_cache; return errors_cache;
int i = 0; int i = 0;
#ifdef CV_SIMD #if CV_SIMD
v_float32 v_center_x = vx_setall_f32(center_x); v_float32 v_center_x = vx_setall_f32(center_x);
v_float32 v_center_y = vx_setall_f32(center_y); v_float32 v_center_y = vx_setall_f32(center_y);
v_float32 v_center_z = vx_setall_f32(center_z); v_float32 v_center_z = vx_setall_f32(center_z);

@ -720,7 +720,7 @@ namespace CV__SIMD_NAMESPACE {
inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); } inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
//! @} //! @}
#ifndef OPENCV_HAL_HAVE_LOAD_STORE_BFLOAT16 #ifndef OPENCV_HAL_HAVE_PACK_STORE_BFLOAT16
inline v_float32 vx_load_expand(const bfloat16_t* ptr) inline v_float32 vx_load_expand(const bfloat16_t* ptr)
{ {
v_uint32 v = vx_load_expand((const ushort*)ptr); v_uint32 v = vx_load_expand((const ushort*)ptr);
@ -730,7 +730,7 @@ namespace CV__SIMD_NAMESPACE {
inline void v_pack_store(const bfloat16_t* ptr, const v_float32& v) inline void v_pack_store(const bfloat16_t* ptr, const v_float32& v)
{ {
v_int32 iv = v_shr<16>(v_reinterpret_as_s32(v)); v_int32 iv = v_shr<16>(v_reinterpret_as_s32(v));
v_pack_store((short*)ptr, iv); cv::v_pack_store((short*)ptr, iv);
} }
#endif #endif
@ -967,6 +967,9 @@ namespace CV__SIMD_NAMESPACE {
OPENCV_HAL_WRAP_CMP(v_float64x4) OPENCV_HAL_WRAP_CMP(v_float64x4)
#endif #endif
#endif #endif
OPENCV_HAL_WRAP_CMP_OP(v_int64, lt, <) \
OPENCV_HAL_WRAP_CMP_OP(v_int64, gt, >) \
//////////// get0 //////////// //////////// get0 ////////////
#define OPENCV_HAL_WRAP_GRT0(_Tpvec) \ #define OPENCV_HAL_WRAP_GRT0(_Tpvec) \
@ -1110,7 +1113,7 @@ namespace CV__SIMD_NAMESPACE {
#define CV_SIMD 0 #define CV_SIMD 0
#endif #endif
#if (!defined CV_SIMD_64F) || (!CV_SIMD_64F) #if !CV_SIMD_64F && !CV_SIMD_SCALABLE_64F
typedef struct v_float64 { int dummy; } v_float64; typedef struct v_float64 { int dummy; } v_float64;
#endif #endif

@ -7,6 +7,7 @@
#define SRC_CONVERT_HPP #define SRC_CONVERT_HPP
#include "opencv2/core/types.hpp" #include "opencv2/core/types.hpp"
#include "opencv2/core/hal/intrin.hpp"
namespace cv namespace cv
{ {
@ -32,11 +33,11 @@ static inline void vx_load_as(const unsigned* ptr, v_float32& a)
{ {
v_uint32 delta = vx_setall_u32(0x80000000U); v_uint32 delta = vx_setall_u32(0x80000000U);
v_uint32 ua = vx_load(ptr); v_uint32 ua = vx_load(ptr);
v_uint32 mask_a = (ua >= delta) & delta; v_uint32 mask_a = v_and(v_ge(ua, delta), delta);
v_float32 fmask_a = v_cvt_f32(v_reinterpret_as_s32(mask_a)); // 0.f or (float)(-(1 << 31)) v_float32 fmask_a = v_cvt_f32(v_reinterpret_as_s32(mask_a)); // 0.f or (float)(-(1 << 31))
a = v_cvt_f32(v_reinterpret_as_s32(ua - mask_a)); a = v_cvt_f32(v_reinterpret_as_s32(v_sub(ua, mask_a)));
// restore the original values // restore the original values
a -= fmask_a; // subtract 0 or a large negative number a = v_sub(a, fmask_a); // subtract 0 or a large negative number
} }
static inline void vx_load_as(const float* ptr, v_float32& a) static inline void vx_load_as(const float* ptr, v_float32& a)
@ -200,8 +201,8 @@ static inline void vx_load_pair_as(const int64_t* ptr, v_uint64& a, v_uint64& b)
{ {
v_int64 z = vx_setzero_s64(); v_int64 z = vx_setzero_s64();
v_int64 ia = vx_load(ptr), ib = vx_load(ptr + VTraits<v_uint64>::vlanes()); v_int64 ia = vx_load(ptr), ib = vx_load(ptr + VTraits<v_uint64>::vlanes());
ia &= (ia > z); ia = v_and(ia, v_gt(ia, z));
ib &= (ib > z); ib = v_and(ib, v_gt(ib, z));
a = v_reinterpret_as_u64(ia); a = v_reinterpret_as_u64(ia);
b = v_reinterpret_as_u64(ib); b = v_reinterpret_as_u64(ib);
} }
@ -212,10 +213,10 @@ static inline void vx_load_pair_as(const int64_t* ptr, v_uint32& a, v_uint32& b)
v_int64 z = vx_setzero_s64(); v_int64 z = vx_setzero_s64();
v_int64 ia0 = vx_load(ptr), ia1 = vx_load(ptr + nlanes); v_int64 ia0 = vx_load(ptr), ia1 = vx_load(ptr + nlanes);
v_int64 ib0 = vx_load(ptr + nlanes*2), ib1 = vx_load(ptr + nlanes*3); v_int64 ib0 = vx_load(ptr + nlanes*2), ib1 = vx_load(ptr + nlanes*3);
ia0 &= (ia0 > z); ia0 = v_and(ia0, v_gt(ia0, z));
ia1 &= (ia1 > z); ia1 = v_and(ia1, v_gt(ia1, z));
ib0 &= (ib0 > z); ib0 = v_and(ib0, v_gt(ib0, z));
ib1 &= (ib1 > z); ib1 = v_and(ib1, v_gt(ib1, z));
a = v_pack(v_reinterpret_as_u64(ia0), v_reinterpret_as_u64(ia1)); a = v_pack(v_reinterpret_as_u64(ia0), v_reinterpret_as_u64(ia1));
b = v_pack(v_reinterpret_as_u64(ib0), v_reinterpret_as_u64(ib1)); b = v_pack(v_reinterpret_as_u64(ib0), v_reinterpret_as_u64(ib1));
} }
@ -246,7 +247,7 @@ static inline void vx_load_pair_as(const bool* ptr, v_float32& a, v_float32& b)
{ {
v_uint16 z = vx_setzero_u16(); v_uint16 z = vx_setzero_u16();
v_uint16 uab = vx_load_expand((const uchar*)ptr); v_uint16 uab = vx_load_expand((const uchar*)ptr);
uab = v_shr<15>(uab > z); uab = v_shr<15>(v_gt(uab, z));
v_int32 ia, ib; v_int32 ia, ib;
v_expand(v_reinterpret_as_s16(uab), ia, ib); v_expand(v_reinterpret_as_s16(uab), ia, ib);
a = v_cvt_f32(ia); a = v_cvt_f32(ia);
@ -257,7 +258,7 @@ static inline void vx_load_as(const bool* ptr, v_float32& a)
{ {
v_uint32 z = vx_setzero_u32(); v_uint32 z = vx_setzero_u32();
v_uint32 ua = vx_load_expand_q((const uchar*)ptr); v_uint32 ua = vx_load_expand_q((const uchar*)ptr);
ua = v_shr<31>(ua > z); ua = v_shr<31>(v_gt(ua, z));
a = v_cvt_f32(v_reinterpret_as_s32(ua)); a = v_cvt_f32(v_reinterpret_as_s32(ua));
} }
@ -330,14 +331,14 @@ static inline void vx_load_pair_as(const unsigned* ptr, v_float32& a, v_float32&
v_uint32 delta = vx_setall_u32(0x80000000U); v_uint32 delta = vx_setall_u32(0x80000000U);
v_uint32 ua = vx_load(ptr); v_uint32 ua = vx_load(ptr);
v_uint32 ub = vx_load(ptr + VTraits<v_uint32>::vlanes()); v_uint32 ub = vx_load(ptr + VTraits<v_uint32>::vlanes());
v_uint32 mask_a = (ua >= delta) & delta, mask_b = (ub >= delta) & delta; v_uint32 mask_a = v_and(v_ge(ua, delta), delta), mask_b = v_and(v_ge(ub, delta), delta);
v_float32 fmask_a = v_cvt_f32(v_reinterpret_as_s32(mask_a)); // 0.f or (float)(-(1 << 31)) v_float32 fmask_a = v_cvt_f32(v_reinterpret_as_s32(mask_a)); // 0.f or (float)(-(1 << 31))
v_float32 fmask_b = v_cvt_f32(v_reinterpret_as_s32(mask_b)); // 0.f or (float)(-(1 << 31)) v_float32 fmask_b = v_cvt_f32(v_reinterpret_as_s32(mask_b)); // 0.f or (float)(-(1 << 31))
a = v_cvt_f32(v_reinterpret_as_s32(ua - mask_a)); a = v_cvt_f32(v_reinterpret_as_s32(v_sub(ua, mask_a)));
b = v_cvt_f32(v_reinterpret_as_s32(ub - mask_b)); b = v_cvt_f32(v_reinterpret_as_s32(v_sub(ub, mask_b)));
// restore the original values // restore the original values
a -= fmask_a; // subtract 0 or a large negative number a = v_sub(a, fmask_a); // subtract 0 or a large negative number
b -= fmask_b; // subtract 0 or a large negative number b = v_sub(b, fmask_b); // subtract 0 or a large negative number
} }
static inline void v_store_pair_as(uchar* ptr, const v_uint16& a, const v_uint16& b) static inline void v_store_pair_as(uchar* ptr, const v_uint16& a, const v_uint16& b)
@ -403,8 +404,8 @@ static inline void v_store_pair_as(schar* ptr, const v_float32& a, const v_float
static inline void v_store_pair_as(bool* ptr, const v_float32& a, const v_float32& b) static inline void v_store_pair_as(bool* ptr, const v_float32& a, const v_float32& b)
{ {
v_float32 z = vx_setzero_f32(); v_float32 z = vx_setzero_f32();
v_uint32 ma = v_shr<31>(v_reinterpret_as_u32(a != z)); v_uint32 ma = v_shr<31>(v_reinterpret_as_u32(v_ne(a, z)));
v_uint32 mb = v_shr<31>(v_reinterpret_as_u32(b != z)); v_uint32 mb = v_shr<31>(v_reinterpret_as_u32(v_ne(b, z)));
v_uint16 mab = v_pack(ma, mb); v_uint16 mab = v_pack(ma, mb);
v_pack_store((uchar*)ptr, mab); v_pack_store((uchar*)ptr, mab);
} }
@ -494,7 +495,7 @@ static inline void vx_load_pair_as(const bool* ptr, v_float64& a, v_float64& b)
{ {
v_uint32 z = vx_setzero_u32(); v_uint32 z = vx_setzero_u32();
v_uint32 uab = vx_load_expand_q((const uchar*)ptr); v_uint32 uab = vx_load_expand_q((const uchar*)ptr);
uab = v_shr<31>(uab > z); uab = v_shr<31>(v_gt(uab, z));
v_float32 fab = v_cvt_f32(v_reinterpret_as_s32(uab)); v_float32 fab = v_cvt_f32(v_reinterpret_as_s32(uab));
a = v_cvt_f64(fab); a = v_cvt_f64(fab);
b = v_cvt_f64_high(fab); b = v_cvt_f64_high(fab);

Loading…
Cancel
Save