|
|
@ -104,7 +104,7 @@ template<typename _Tp> struct V_TypeTraits |
|
|
|
{ |
|
|
|
{ |
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
#define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_, nlanes128_) \ |
|
|
|
#define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_) \ |
|
|
|
template<> struct V_TypeTraits<type> \
|
|
|
|
template<> struct V_TypeTraits<type> \
|
|
|
|
{ \
|
|
|
|
{ \
|
|
|
|
typedef type value_type; \
|
|
|
|
typedef type value_type; \
|
|
|
@ -114,7 +114,6 @@ template<typename _Tp> struct V_TypeTraits |
|
|
|
typedef w_type_ w_type; \
|
|
|
|
typedef w_type_ w_type; \
|
|
|
|
typedef q_type_ q_type; \
|
|
|
|
typedef q_type_ q_type; \
|
|
|
|
typedef sum_type_ sum_type; \
|
|
|
|
typedef sum_type_ sum_type; \
|
|
|
|
enum { nlanes128 = nlanes128_ }; \
|
|
|
|
|
|
|
|
\
|
|
|
|
\
|
|
|
|
static inline int_type reinterpret_int(type x) \
|
|
|
|
static inline int_type reinterpret_int(type x) \
|
|
|
|
{ \
|
|
|
|
{ \
|
|
|
@ -131,7 +130,7 @@ template<typename _Tp> struct V_TypeTraits |
|
|
|
} \
|
|
|
|
} \
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_, nlanes128_) \ |
|
|
|
#define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_) \ |
|
|
|
template<> struct V_TypeTraits<type> \
|
|
|
|
template<> struct V_TypeTraits<type> \
|
|
|
|
{ \
|
|
|
|
{ \
|
|
|
|
typedef type value_type; \
|
|
|
|
typedef type value_type; \
|
|
|
@ -140,7 +139,6 @@ template<typename _Tp> struct V_TypeTraits |
|
|
|
typedef uint_type_ uint_type; \
|
|
|
|
typedef uint_type_ uint_type; \
|
|
|
|
typedef w_type_ w_type; \
|
|
|
|
typedef w_type_ w_type; \
|
|
|
|
typedef sum_type_ sum_type; \
|
|
|
|
typedef sum_type_ sum_type; \
|
|
|
|
enum { nlanes128 = nlanes128_ }; \
|
|
|
|
|
|
|
|
\
|
|
|
|
\
|
|
|
|
static inline int_type reinterpret_int(type x) \
|
|
|
|
static inline int_type reinterpret_int(type x) \
|
|
|
|
{ \
|
|
|
|
{ \
|
|
|
@ -157,16 +155,16 @@ template<typename _Tp> struct V_TypeTraits |
|
|
|
} \
|
|
|
|
} \
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned, 16); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int, 16); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned, 8); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int, 8); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned, 4); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int, 4); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float, 4); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64, 2); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64, 2); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double, 2); |
|
|
|
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double); |
|
|
|
|
|
|
|
|
|
|
|
#ifndef CV_DOXYGEN |
|
|
|
#ifndef CV_DOXYGEN |
|
|
|
|
|
|
|
|
|
|
@ -314,54 +312,6 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN |
|
|
|
|
|
|
|
|
|
|
|
//==================================================================================================
|
|
|
|
//==================================================================================================
|
|
|
|
|
|
|
|
|
|
|
|
#define CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \ |
|
|
|
|
|
|
|
inline vtyp vx_setall_##short_typ(typ v) { return prefix##_setall_##short_typ(v); } \
|
|
|
|
|
|
|
|
inline vtyp vx_setzero_##short_typ() { return prefix##_setzero_##short_typ(); } \
|
|
|
|
|
|
|
|
inline vtyp vx_##loadsfx(const typ* ptr) { return prefix##_##loadsfx(ptr); } \
|
|
|
|
|
|
|
|
inline vtyp vx_##loadsfx##_aligned(const typ* ptr) { return prefix##_##loadsfx##_aligned(ptr); } \
|
|
|
|
|
|
|
|
inline vtyp vx_##loadsfx##_low(const typ* ptr) { return prefix##_##loadsfx##_low(ptr); } \
|
|
|
|
|
|
|
|
inline vtyp vx_##loadsfx##_halves(const typ* ptr0, const typ* ptr1) { return prefix##_##loadsfx##_halves(ptr0, ptr1); } \
|
|
|
|
|
|
|
|
inline void vx_store(typ* ptr, const vtyp& v) { return v_store(ptr, v); } \
|
|
|
|
|
|
|
|
inline void vx_store_aligned(typ* ptr, const vtyp& v) { return v_store_aligned(ptr, v); } \
|
|
|
|
|
|
|
|
inline vtyp vx_lut(const typ* ptr, const int* idx) { return prefix##_lut(ptr, idx); } \
|
|
|
|
|
|
|
|
inline vtyp vx_lut_pairs(const typ* ptr, const int* idx) { return prefix##_lut_pairs(ptr, idx); } |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define CV_INTRIN_DEFINE_WIDE_LUT_QUAD(typ, vtyp, prefix) \ |
|
|
|
|
|
|
|
inline vtyp vx_lut_quads(const typ* ptr, const int* idx) { return prefix##_lut_quads(ptr, idx); } |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \ |
|
|
|
|
|
|
|
inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); } |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) \ |
|
|
|
|
|
|
|
inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); } |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(typ, vtyp, short_typ, wtyp, qtyp, prefix, loadsfx) \ |
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_LUT_QUAD(typ, vtyp, prefix) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(prefix) \ |
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(uchar, v_uint8, u8, v_uint16, v_uint32, prefix, load) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(schar, v_int8, s8, v_int16, v_int32, prefix, load) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_INTRIN(ushort, v_uint16, u16, prefix, load) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_LUT_QUAD(ushort, v_uint16, prefix) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(ushort, v_uint32, prefix) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_INTRIN(short, v_int16, s16, prefix, load) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_LUT_QUAD(short, v_int16, prefix) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(short, v_int32, prefix) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_INTRIN(int, v_int32, s32, prefix, load) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_LUT_QUAD(int, v_int32, prefix) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(int, v_int64, prefix) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_INTRIN(unsigned, v_uint32, u32, prefix, load) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_LUT_QUAD(unsigned, v_uint32, prefix) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(unsigned, v_uint64, prefix) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_INTRIN(float, v_float32, f32, prefix, load) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_LUT_QUAD(float, v_float32, prefix) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_INTRIN(int64, v_int64, s64, prefix, load) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_INTRIN(uint64, v_uint64, u64, prefix, load) \
|
|
|
|
|
|
|
|
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(float16_t, v_float32, prefix) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template<typename _Tp> struct V_RegTraits |
|
|
|
template<typename _Tp> struct V_RegTraits |
|
|
|
{ |
|
|
|
{ |
|
|
|
}; |
|
|
|
}; |
|
|
@ -421,6 +371,7 @@ template<typename _Tp> struct V_RegTraits |
|
|
|
CV_DEF_REG_TRAITS(v512, v_int64x8, int64, s64, v_uint64x8, void, void, v_int64x8, void); |
|
|
|
CV_DEF_REG_TRAITS(v512, v_int64x8, int64, s64, v_uint64x8, void, void, v_int64x8, void); |
|
|
|
CV_DEF_REG_TRAITS(v512, v_float64x8, double, f64, v_float64x8, void, void, v_int64x8, v_int32x16); |
|
|
|
CV_DEF_REG_TRAITS(v512, v_float64x8, double, f64, v_float64x8, void, void, v_int64x8, v_int32x16); |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
|
|
|
|
//! @endcond
|
|
|
|
|
|
|
|
|
|
|
|
#if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512) |
|
|
|
#if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512) |
|
|
|
#define CV__SIMD_NAMESPACE simd512 |
|
|
|
#define CV__SIMD_NAMESPACE simd512 |
|
|
@ -429,21 +380,33 @@ namespace CV__SIMD_NAMESPACE { |
|
|
|
#define CV_SIMD_64F CV_SIMD512_64F |
|
|
|
#define CV_SIMD_64F CV_SIMD512_64F |
|
|
|
#define CV_SIMD_FP16 CV_SIMD512_FP16 |
|
|
|
#define CV_SIMD_FP16 CV_SIMD512_FP16 |
|
|
|
#define CV_SIMD_WIDTH 64 |
|
|
|
#define CV_SIMD_WIDTH 64 |
|
|
|
|
|
|
|
//! @addtogroup core_hal_intrin
|
|
|
|
|
|
|
|
//! @{
|
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 8-bit unsigned integer values
|
|
|
|
typedef v_uint8x64 v_uint8; |
|
|
|
typedef v_uint8x64 v_uint8; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 8-bit signed integer values
|
|
|
|
typedef v_int8x64 v_int8; |
|
|
|
typedef v_int8x64 v_int8; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 16-bit unsigned integer values
|
|
|
|
typedef v_uint16x32 v_uint16; |
|
|
|
typedef v_uint16x32 v_uint16; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 16-bit signed integer values
|
|
|
|
typedef v_int16x32 v_int16; |
|
|
|
typedef v_int16x32 v_int16; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 32-bit unsigned integer values
|
|
|
|
typedef v_uint32x16 v_uint32; |
|
|
|
typedef v_uint32x16 v_uint32; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 32-bit signed integer values
|
|
|
|
typedef v_int32x16 v_int32; |
|
|
|
typedef v_int32x16 v_int32; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 64-bit unsigned integer values
|
|
|
|
typedef v_uint64x8 v_uint64; |
|
|
|
typedef v_uint64x8 v_uint64; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 64-bit signed integer values
|
|
|
|
typedef v_int64x8 v_int64; |
|
|
|
typedef v_int64x8 v_int64; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
|
|
|
|
typedef v_float32x16 v_float32; |
|
|
|
typedef v_float32x16 v_float32; |
|
|
|
CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v512) |
|
|
|
#if CV_SIMD512_64F |
|
|
|
#if CV_SIMD512_64F |
|
|
|
//! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
|
|
|
|
typedef v_float64x8 v_float64; |
|
|
|
typedef v_float64x8 v_float64; |
|
|
|
CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v512, load) |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
//! @}
|
|
|
|
inline void vx_cleanup() { v512_cleanup(); } |
|
|
|
|
|
|
|
|
|
|
|
#define VXPREFIX(func) v512##func |
|
|
|
} // namespace
|
|
|
|
} // namespace
|
|
|
|
using namespace CV__SIMD_NAMESPACE; |
|
|
|
using namespace CV__SIMD_NAMESPACE; |
|
|
|
#elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256) |
|
|
|
#elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256) |
|
|
@ -453,21 +416,33 @@ namespace CV__SIMD_NAMESPACE { |
|
|
|
#define CV_SIMD_64F CV_SIMD256_64F |
|
|
|
#define CV_SIMD_64F CV_SIMD256_64F |
|
|
|
#define CV_SIMD_FP16 CV_SIMD256_FP16 |
|
|
|
#define CV_SIMD_FP16 CV_SIMD256_FP16 |
|
|
|
#define CV_SIMD_WIDTH 32 |
|
|
|
#define CV_SIMD_WIDTH 32 |
|
|
|
|
|
|
|
//! @addtogroup core_hal_intrin
|
|
|
|
|
|
|
|
//! @{
|
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 8-bit unsigned integer values
|
|
|
|
typedef v_uint8x32 v_uint8; |
|
|
|
typedef v_uint8x32 v_uint8; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 8-bit signed integer values
|
|
|
|
typedef v_int8x32 v_int8; |
|
|
|
typedef v_int8x32 v_int8; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 16-bit unsigned integer values
|
|
|
|
typedef v_uint16x16 v_uint16; |
|
|
|
typedef v_uint16x16 v_uint16; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 16-bit signed integer values
|
|
|
|
typedef v_int16x16 v_int16; |
|
|
|
typedef v_int16x16 v_int16; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 32-bit unsigned integer values
|
|
|
|
typedef v_uint32x8 v_uint32; |
|
|
|
typedef v_uint32x8 v_uint32; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 32-bit signed integer values
|
|
|
|
typedef v_int32x8 v_int32; |
|
|
|
typedef v_int32x8 v_int32; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 64-bit unsigned integer values
|
|
|
|
typedef v_uint64x4 v_uint64; |
|
|
|
typedef v_uint64x4 v_uint64; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 64-bit signed integer values
|
|
|
|
typedef v_int64x4 v_int64; |
|
|
|
typedef v_int64x4 v_int64; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
|
|
|
|
typedef v_float32x8 v_float32; |
|
|
|
typedef v_float32x8 v_float32; |
|
|
|
CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v256) |
|
|
|
|
|
|
|
#if CV_SIMD256_64F |
|
|
|
#if CV_SIMD256_64F |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
|
|
|
|
typedef v_float64x4 v_float64; |
|
|
|
typedef v_float64x4 v_float64; |
|
|
|
CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v256, load) |
|
|
|
|
|
|
|
#endif |
|
|
|
#endif |
|
|
|
inline void vx_cleanup() { v256_cleanup(); } |
|
|
|
//! @}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define VXPREFIX(func) v256##func |
|
|
|
} // namespace
|
|
|
|
} // namespace
|
|
|
|
using namespace CV__SIMD_NAMESPACE; |
|
|
|
using namespace CV__SIMD_NAMESPACE; |
|
|
|
#elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128) |
|
|
|
#elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128) |
|
|
@ -480,25 +455,228 @@ namespace CV__SIMD_NAMESPACE { |
|
|
|
#define CV_SIMD CV_SIMD128 |
|
|
|
#define CV_SIMD CV_SIMD128 |
|
|
|
#define CV_SIMD_64F CV_SIMD128_64F |
|
|
|
#define CV_SIMD_64F CV_SIMD128_64F |
|
|
|
#define CV_SIMD_WIDTH 16 |
|
|
|
#define CV_SIMD_WIDTH 16 |
|
|
|
|
|
|
|
//! @addtogroup core_hal_intrin
|
|
|
|
|
|
|
|
//! @{
|
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 8-bit unsigned integer values
|
|
|
|
typedef v_uint8x16 v_uint8; |
|
|
|
typedef v_uint8x16 v_uint8; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 8-bit signed integer values
|
|
|
|
typedef v_int8x16 v_int8; |
|
|
|
typedef v_int8x16 v_int8; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 16-bit unsigned integer values
|
|
|
|
typedef v_uint16x8 v_uint16; |
|
|
|
typedef v_uint16x8 v_uint16; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 16-bit signed integer values
|
|
|
|
typedef v_int16x8 v_int16; |
|
|
|
typedef v_int16x8 v_int16; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 32-bit unsigned integer values
|
|
|
|
typedef v_uint32x4 v_uint32; |
|
|
|
typedef v_uint32x4 v_uint32; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 32-bit signed integer values
|
|
|
|
typedef v_int32x4 v_int32; |
|
|
|
typedef v_int32x4 v_int32; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 64-bit unsigned integer values
|
|
|
|
typedef v_uint64x2 v_uint64; |
|
|
|
typedef v_uint64x2 v_uint64; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 64-bit signed integer values
|
|
|
|
typedef v_int64x2 v_int64; |
|
|
|
typedef v_int64x2 v_int64; |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
|
|
|
|
typedef v_float32x4 v_float32; |
|
|
|
typedef v_float32x4 v_float32; |
|
|
|
CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v) |
|
|
|
|
|
|
|
#if CV_SIMD128_64F |
|
|
|
#if CV_SIMD128_64F |
|
|
|
|
|
|
|
//! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
|
|
|
|
typedef v_float64x2 v_float64; |
|
|
|
typedef v_float64x2 v_float64; |
|
|
|
CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v, load) |
|
|
|
|
|
|
|
#endif |
|
|
|
#endif |
|
|
|
inline void vx_cleanup() { v_cleanup(); } |
|
|
|
//! @}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define VXPREFIX(func) v##func |
|
|
|
} // namespace
|
|
|
|
} // namespace
|
|
|
|
using namespace CV__SIMD_NAMESPACE; |
|
|
|
using namespace CV__SIMD_NAMESPACE; |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace CV__SIMD_NAMESPACE { |
|
|
|
|
|
|
|
//! @addtogroup core_hal_intrin
|
|
|
|
|
|
|
|
//! @{
|
|
|
|
|
|
|
|
//! @name Wide init with value
|
|
|
|
|
|
|
|
//! @{
|
|
|
|
|
|
|
|
//! @brief Create maximum available capacity vector with elements set to a specific value
|
|
|
|
|
|
|
|
inline v_uint8 vx_setall_u8(uchar v) { return VXPREFIX(_setall_u8)(v); } |
|
|
|
|
|
|
|
inline v_int8 vx_setall_s8(schar v) { return VXPREFIX(_setall_s8)(v); } |
|
|
|
|
|
|
|
inline v_uint16 vx_setall_u16(ushort v) { return VXPREFIX(_setall_u16)(v); } |
|
|
|
|
|
|
|
inline v_int16 vx_setall_s16(short v) { return VXPREFIX(_setall_s16)(v); } |
|
|
|
|
|
|
|
inline v_int32 vx_setall_s32(int v) { return VXPREFIX(_setall_s32)(v); } |
|
|
|
|
|
|
|
inline v_uint32 vx_setall_u32(unsigned v) { return VXPREFIX(_setall_u32)(v); } |
|
|
|
|
|
|
|
inline v_float32 vx_setall_f32(float v) { return VXPREFIX(_setall_f32)(v); } |
|
|
|
|
|
|
|
inline v_int64 vx_setall_s64(int64 v) { return VXPREFIX(_setall_s64)(v); } |
|
|
|
|
|
|
|
inline v_uint64 vx_setall_u64(uint64 v) { return VXPREFIX(_setall_u64)(v); } |
|
|
|
|
|
|
|
#if CV_SIMD_64F |
|
|
|
|
|
|
|
inline v_float64 vx_setall_f64(double v) { return VXPREFIX(_setall_f64)(v); } |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
//! @}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//! @name Wide init with zero
|
|
|
|
|
|
|
|
//! @{
|
|
|
|
|
|
|
|
//! @brief Create maximum available capacity vector with elements set to zero
|
|
|
|
|
|
|
|
inline v_uint8 vx_setzero_u8() { return VXPREFIX(_setzero_u8)(); } |
|
|
|
|
|
|
|
inline v_int8 vx_setzero_s8() { return VXPREFIX(_setzero_s8)(); } |
|
|
|
|
|
|
|
inline v_uint16 vx_setzero_u16() { return VXPREFIX(_setzero_u16)(); } |
|
|
|
|
|
|
|
inline v_int16 vx_setzero_s16() { return VXPREFIX(_setzero_s16)(); } |
|
|
|
|
|
|
|
inline v_int32 vx_setzero_s32() { return VXPREFIX(_setzero_s32)(); } |
|
|
|
|
|
|
|
inline v_uint32 vx_setzero_u32() { return VXPREFIX(_setzero_u32)(); } |
|
|
|
|
|
|
|
inline v_float32 vx_setzero_f32() { return VXPREFIX(_setzero_f32)(); } |
|
|
|
|
|
|
|
inline v_int64 vx_setzero_s64() { return VXPREFIX(_setzero_s64)(); } |
|
|
|
|
|
|
|
inline v_uint64 vx_setzero_u64() { return VXPREFIX(_setzero_u64)(); } |
|
|
|
|
|
|
|
#if CV_SIMD_64F |
|
|
|
|
|
|
|
inline v_float64 vx_setzero_f64() { return VXPREFIX(_setzero_f64)(); } |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
//! @}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//! @name Wide load from memory
|
|
|
|
|
|
|
|
//! @{
|
|
|
|
|
|
|
|
//! @brief Load maximum available capacity register contents from memory
|
|
|
|
|
|
|
|
inline v_uint8 vx_load(const uchar * ptr) { return VXPREFIX(_load)(ptr); } |
|
|
|
|
|
|
|
inline v_int8 vx_load(const schar * ptr) { return VXPREFIX(_load)(ptr); } |
|
|
|
|
|
|
|
inline v_uint16 vx_load(const ushort * ptr) { return VXPREFIX(_load)(ptr); } |
|
|
|
|
|
|
|
inline v_int16 vx_load(const short * ptr) { return VXPREFIX(_load)(ptr); } |
|
|
|
|
|
|
|
inline v_int32 vx_load(const int * ptr) { return VXPREFIX(_load)(ptr); } |
|
|
|
|
|
|
|
inline v_uint32 vx_load(const unsigned * ptr) { return VXPREFIX(_load)(ptr); } |
|
|
|
|
|
|
|
inline v_float32 vx_load(const float * ptr) { return VXPREFIX(_load)(ptr); } |
|
|
|
|
|
|
|
inline v_int64 vx_load(const int64 * ptr) { return VXPREFIX(_load)(ptr); } |
|
|
|
|
|
|
|
inline v_uint64 vx_load(const uint64 * ptr) { return VXPREFIX(_load)(ptr); } |
|
|
|
|
|
|
|
#if CV_SIMD_64F |
|
|
|
|
|
|
|
inline v_float64 vx_load(const double * ptr) { return VXPREFIX(_load)(ptr); } |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
//! @}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//! @name Wide load from memory(aligned)
|
|
|
|
|
|
|
|
//! @{
|
|
|
|
|
|
|
|
//! @brief Load maximum available capacity register contents from memory(aligned)
|
|
|
|
|
|
|
|
inline v_uint8 vx_load_aligned(const uchar * ptr) { return VXPREFIX(_load_aligned)(ptr); } |
|
|
|
|
|
|
|
inline v_int8 vx_load_aligned(const schar * ptr) { return VXPREFIX(_load_aligned)(ptr); } |
|
|
|
|
|
|
|
inline v_uint16 vx_load_aligned(const ushort * ptr) { return VXPREFIX(_load_aligned)(ptr); } |
|
|
|
|
|
|
|
inline v_int16 vx_load_aligned(const short * ptr) { return VXPREFIX(_load_aligned)(ptr); } |
|
|
|
|
|
|
|
inline v_int32 vx_load_aligned(const int * ptr) { return VXPREFIX(_load_aligned)(ptr); } |
|
|
|
|
|
|
|
inline v_uint32 vx_load_aligned(const unsigned * ptr) { return VXPREFIX(_load_aligned)(ptr); } |
|
|
|
|
|
|
|
inline v_float32 vx_load_aligned(const float * ptr) { return VXPREFIX(_load_aligned)(ptr); } |
|
|
|
|
|
|
|
inline v_int64 vx_load_aligned(const int64 * ptr) { return VXPREFIX(_load_aligned)(ptr); } |
|
|
|
|
|
|
|
inline v_uint64 vx_load_aligned(const uint64 * ptr) { return VXPREFIX(_load_aligned)(ptr); } |
|
|
|
|
|
|
|
#if CV_SIMD_64F |
|
|
|
|
|
|
|
inline v_float64 vx_load_aligned(const double * ptr) { return VXPREFIX(_load_aligned)(ptr); } |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
//! @}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//! @name Wide load lower half from memory
|
|
|
|
|
|
|
|
//! @{
|
|
|
|
|
|
|
|
//! @brief Load lower half of maximum available capacity register from memory
|
|
|
|
|
|
|
|
inline v_uint8 vx_load_low(const uchar * ptr) { return VXPREFIX(_load_low)(ptr); } |
|
|
|
|
|
|
|
inline v_int8 vx_load_low(const schar * ptr) { return VXPREFIX(_load_low)(ptr); } |
|
|
|
|
|
|
|
inline v_uint16 vx_load_low(const ushort * ptr) { return VXPREFIX(_load_low)(ptr); } |
|
|
|
|
|
|
|
inline v_int16 vx_load_low(const short * ptr) { return VXPREFIX(_load_low)(ptr); } |
|
|
|
|
|
|
|
inline v_int32 vx_load_low(const int * ptr) { return VXPREFIX(_load_low)(ptr); } |
|
|
|
|
|
|
|
inline v_uint32 vx_load_low(const unsigned * ptr) { return VXPREFIX(_load_low)(ptr); } |
|
|
|
|
|
|
|
inline v_float32 vx_load_low(const float * ptr) { return VXPREFIX(_load_low)(ptr); } |
|
|
|
|
|
|
|
inline v_int64 vx_load_low(const int64 * ptr) { return VXPREFIX(_load_low)(ptr); } |
|
|
|
|
|
|
|
inline v_uint64 vx_load_low(const uint64 * ptr) { return VXPREFIX(_load_low)(ptr); } |
|
|
|
|
|
|
|
#if CV_SIMD_64F |
|
|
|
|
|
|
|
inline v_float64 vx_load_low(const double * ptr) { return VXPREFIX(_load_low)(ptr); } |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
//! @}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//! @name Wide load halfs from memory
|
|
|
|
|
|
|
|
//! @{
|
|
|
|
|
|
|
|
//! @brief Load maximum available capacity register contents from two memory blocks
|
|
|
|
|
|
|
|
inline v_uint8 vx_load_halves(const uchar * ptr0, const uchar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } |
|
|
|
|
|
|
|
inline v_int8 vx_load_halves(const schar * ptr0, const schar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } |
|
|
|
|
|
|
|
inline v_uint16 vx_load_halves(const ushort * ptr0, const ushort * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } |
|
|
|
|
|
|
|
inline v_int16 vx_load_halves(const short * ptr0, const short * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } |
|
|
|
|
|
|
|
inline v_int32 vx_load_halves(const int * ptr0, const int * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } |
|
|
|
|
|
|
|
inline v_uint32 vx_load_halves(const unsigned * ptr0, const unsigned * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } |
|
|
|
|
|
|
|
inline v_float32 vx_load_halves(const float * ptr0, const float * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } |
|
|
|
|
|
|
|
inline v_int64 vx_load_halves(const int64 * ptr0, const int64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } |
|
|
|
|
|
|
|
inline v_uint64 vx_load_halves(const uint64 * ptr0, const uint64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } |
|
|
|
|
|
|
|
#if CV_SIMD_64F |
|
|
|
|
|
|
|
inline v_float64 vx_load_halves(const double * ptr0, const double * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
//! @}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//! @name Wide LUT of elements
|
|
|
|
|
|
|
|
//! @{
|
|
|
|
|
|
|
|
//! @brief Load maximum available capacity register contents with array elements by provided indexes
|
|
|
|
|
|
|
|
inline v_uint8 vx_lut(const uchar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_int8 vx_lut(const schar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_uint16 vx_lut(const ushort * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_int16 vx_lut(const short* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_int32 vx_lut(const int* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_uint32 vx_lut(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_float32 vx_lut(const float* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_int64 vx_lut(const int64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_uint64 vx_lut(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } |
|
|
|
|
|
|
|
#if CV_SIMD_64F |
|
|
|
|
|
|
|
inline v_float64 vx_lut(const double* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
//! @}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//! @name Wide LUT of element pairs
|
|
|
|
|
|
|
|
//! @{
|
|
|
|
|
|
|
|
//! @brief Load maximum available capacity register contents with array element pairs by provided indexes
|
|
|
|
|
|
|
|
inline v_uint8 vx_lut_pairs(const uchar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_int8 vx_lut_pairs(const schar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_uint16 vx_lut_pairs(const ushort * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_int16 vx_lut_pairs(const short* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_int32 vx_lut_pairs(const int* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_uint32 vx_lut_pairs(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_float32 vx_lut_pairs(const float* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_int64 vx_lut_pairs(const int64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_uint64 vx_lut_pairs(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } |
|
|
|
|
|
|
|
#if CV_SIMD_64F |
|
|
|
|
|
|
|
inline v_float64 vx_lut_pairs(const double* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
//! @}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//! @name Wide LUT of element quads
|
|
|
|
|
|
|
|
//! @{
|
|
|
|
|
|
|
|
//! @brief Load maximum available capacity register contents with array element quads by provided indexes
|
|
|
|
|
|
|
|
inline v_uint8 vx_lut_quads(const uchar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_int8 vx_lut_quads(const schar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_uint16 vx_lut_quads(const ushort* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_int16 vx_lut_quads(const short* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_int32 vx_lut_quads(const int* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_uint32 vx_lut_quads(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } |
|
|
|
|
|
|
|
inline v_float32 vx_lut_quads(const float* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } |
|
|
|
|
|
|
|
//! @}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//! @name Wide load with double expansion
|
|
|
|
|
|
|
|
//! @{
|
|
|
|
|
|
|
|
//! @brief Load maximum available capacity register contents from memory with double expand
|
|
|
|
|
|
|
|
inline v_uint16 vx_load_expand(const uchar * ptr) { return VXPREFIX(_load_expand)(ptr); } |
|
|
|
|
|
|
|
inline v_int16 vx_load_expand(const schar * ptr) { return VXPREFIX(_load_expand)(ptr); } |
|
|
|
|
|
|
|
inline v_uint32 vx_load_expand(const ushort * ptr) { return VXPREFIX(_load_expand)(ptr); } |
|
|
|
|
|
|
|
inline v_int32 vx_load_expand(const short* ptr) { return VXPREFIX(_load_expand)(ptr); } |
|
|
|
|
|
|
|
inline v_int64 vx_load_expand(const int* ptr) { return VXPREFIX(_load_expand)(ptr); } |
|
|
|
|
|
|
|
inline v_uint64 vx_load_expand(const unsigned* ptr) { return VXPREFIX(_load_expand)(ptr); } |
|
|
|
|
|
|
|
inline v_float32 vx_load_expand(const float16_t * ptr) { return VXPREFIX(_load_expand)(ptr); } |
|
|
|
|
|
|
|
//! @}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//! @name Wide load with quad expansion
|
|
|
|
|
|
|
|
//! @{
|
|
|
|
|
|
|
|
//! @brief Load maximum available capacity register contents from memory with quad expand
|
|
|
|
|
|
|
|
inline v_uint32 vx_load_expand_q(const uchar * ptr) { return VXPREFIX(_load_expand_q)(ptr); } |
|
|
|
|
|
|
|
inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); } |
|
|
|
|
|
|
|
//! @}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** @brief SIMD processing state cleanup call */ |
|
|
|
|
|
|
|
inline void vx_cleanup() { VXPREFIX(_cleanup)(); } |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//! @cond IGNORED
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// backward compatibility
|
|
|
|
|
|
|
|
template<typename _Tp, typename _Tvec> static inline |
|
|
|
|
|
|
|
void vx_store(_Tp* dst, const _Tvec& v) { return v_store(dst, v); } |
|
|
|
|
|
|
|
// backward compatibility
|
|
|
|
|
|
|
|
template<typename _Tp, typename _Tvec> static inline |
|
|
|
|
|
|
|
void vx_store_aligned(_Tp* dst, const _Tvec& v) { return v_store_aligned(dst, v); } |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//! @endcond
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//! @}
|
|
|
|
|
|
|
|
#undef VXPREFIX |
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//! @cond IGNORED
|
|
|
|
#ifndef CV_SIMD_64F |
|
|
|
#ifndef CV_SIMD_64F |
|
|
|
#define CV_SIMD_64F 0 |
|
|
|
#define CV_SIMD_64F 0 |
|
|
|
#endif |
|
|
|
#endif |
|
|
|