Merge pull request #24936 from mshabunin:fix-rvv07-scale64f

RISC-V: fix scale64f performance for RVV 0.7
pull/24927/head
Alexander Smorkalov 10 months ago committed by GitHub
commit 54b7cafd2a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 14
      modules/core/include/opencv2/core/hal/intrin_rvv071.hpp

@ -490,12 +490,12 @@ inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b)
inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c)
{
return v_float32x4(vfmacc_vv_f32m1(c.val, a.val, b.val, 4));
return v_float32x4(vfmadd_vv_f32m1(a.val, b.val, c.val, 4));
}
inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
{
return v_int32x4(vmacc_vv_i32m1(c.val, a.val, b.val, 4));
return v_int32x4(vmadd_vv_i32m1(a.val, b.val, c.val, 4));
}
inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c)
@ -553,7 +553,7 @@ inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b)
inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
{
return v_float64x2(vfmacc_vv_f64m1(c.val, a.val, b.val, 2));
return v_float64x2(vfmadd_vv_f64m1(a.val, b.val, c.val, 2));
}
inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
@ -1429,7 +1429,7 @@ inline _Tpvec v_load_low(const _Tp* ptr) \
inline _Tpvec v_load_aligned(const _Tp* ptr) \
{ return _Tpvec(vreinterpret_v_##ldst_len##_##len(vle8_v_##ldst_len((ldst_type *)ptr, 16))); } \
inline _Tpvec v_load(const _Tp* ptr) \
{ return _Tpvec(vreinterpret_v_##ldst_len##_##len(vle8_v_##ldst_len((ldst_type *)ptr, 16))); } \
{ return _Tpvec(vle##elemsize##_v_##len(ptr, num)); } \
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
{ vse8_v_##ldst_len((ldst_type *)ptr, vreinterpret_v_##len##_##ldst_len(a.val), 8);}\
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
@ -1438,7 +1438,7 @@ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
a0 = vslidedown_vx_##len(a0, a.val, hnum, num); \
vse8_v_##ldst_len((ldst_type *)ptr, vreinterpret_v_##len##_##ldst_len(a0), 8);}\
inline void v_store(_Tp* ptr, const _Tpvec& a) \
{ vse8_v_##ldst_len((ldst_type *)ptr, vreinterpret_v_##len##_##ldst_len(a.val), 16); } \
{ vse##elemsize##_v_##len(ptr, a.val, num); } \
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
{ vse8_v_##ldst_len((ldst_type *)ptr, vreinterpret_v_##len##_##ldst_len(a.val), 16); } \
inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
@ -1469,7 +1469,7 @@ inline _Tpvec v_load_low(const _Tp* ptr) \
inline _Tpvec v_load_aligned(const _Tp* ptr) \
{ return _Tpvec(vreinterpret_v_u##elemsize##m1_##len(vreinterpret_v_u8m1_u##elemsize##m1(vle8_v_u8m1((uchar *)ptr, 16)))); } \
inline _Tpvec v_load(const _Tp* ptr) \
{ return _Tpvec(vreinterpret_v_u##elemsize##m1_##len(vreinterpret_v_u8m1_u##elemsize##m1(vle8_v_u8m1((uchar *)ptr, 16)))); } \
{ return _Tpvec(vle##elemsize##_v_##len(ptr, num)); } \
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
{ vse8_v_u8m1((uchar *)ptr, vreinterpret_v_u##elemsize##m1_u8m1(vreinterpret_v_##len##_u##elemsize##m1(a.val)), 8);}\
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
@ -1478,7 +1478,7 @@ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
a0 = vslidedown_vx_##len(a0, a.val, hnum, num); \
vse8_v_u8m1((uchar *)ptr, vreinterpret_v_u##elemsize##m1_u8m1(vreinterpret_v_##len##_u##elemsize##m1(a0)), 8);}\
inline void v_store(_Tp* ptr, const _Tpvec& a) \
{ vse8_v_u8m1((uchar *)ptr, vreinterpret_v_u##elemsize##m1_u8m1(vreinterpret_v_##len##_u##elemsize##m1(a.val)), 16); } \
{ vse##elemsize##_v_##len(ptr, a.val, num); } \
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
{ vse8_v_u8m1((uchar *)ptr, vreinterpret_v_u##elemsize##m1_u8m1(vreinterpret_v_##len##_u##elemsize##m1(a.val)), 16); } \
inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \

Loading…
Cancel
Save