core: reimplement SIMD arithmetic, logic and comparison operations into wide universal intrinsics

- initialize arithmetic dispatcher
  - add new universal intrinsic v_absdiffs
  - add new universal intrinsic v_pack_b
  - add accumulate version of universal intrinsic v_round
  - fix sse/avx2:uint8 multiplication overflow
  - reimplement arithmetic, logic and comparison operations into wide universal intrinsics
    with full support for all types
  - reimplement IPP arithmetic, logic and comparison operations in a sperate file arithm_ipp.hpp
  - avoid scalar multiplication if scaling factor eq 1 and use integer multiplication
  - move C arithmetic operations to precomp.hpp and delete [arithm_simd|arithm_core].hpp
  - add compatibility with new opencv4 divide policy
pull/12064/head
Sayed Adel 6 years ago
parent d61ad04f11
commit 93ffebc273
  1. 1
      modules/core/CMakeLists.txt
  2. 52
      modules/core/include/opencv2/core/hal/intrin_avx.hpp
  3. 128
      modules/core/include/opencv2/core/hal/intrin_cpp.hpp
  4. 41
      modules/core/include/opencv2/core/hal/intrin_neon.hpp
  5. 94
      modules/core/include/opencv2/core/hal/intrin_sse.hpp
  6. 57
      modules/core/include/opencv2/core/hal/intrin_vsx.hpp
  7. 1012
      modules/core/src/arithm.cpp
  8. 11
      modules/core/src/arithm.dispatch.cpp
  9. 1937
      modules/core/src/arithm.simd.hpp
  10. 623
      modules/core/src/arithm_core.hpp
  11. 417
      modules/core/src/arithm_ipp.hpp
  12. 2025
      modules/core/src/arithm_simd.hpp
  13. 97
      modules/core/src/precomp.hpp
  14. 103
      modules/core/test/test_intrin_utils.hpp

@ -2,6 +2,7 @@ set(the_description "The Core Functionality")
ocv_add_dispatched_file(mathfuncs_core SSE2 AVX AVX2)
ocv_add_dispatched_file(stat SSE4_2 AVX2)
ocv_add_dispatched_file(arithm SSE2 SSE4_1 AVX2)
# dispatching for accuracy tests
ocv_add_dispatched_file_force_all(test_intrin128 TEST SSE2 SSE3 SSSE3 SSE4_1 SSE4_2 AVX FP16 AVX2)

@ -661,7 +661,7 @@ inline v_uint8x32 operator * (const v_uint8x32& a, const v_uint8x32& b)
{
v_uint16x16 c, d;
v_mul_expand(a, b, c, d);
return v_pack_u(v_reinterpret_as_s16(c), v_reinterpret_as_s16(d));
return v_pack(c, d);
}
inline v_int8x32 operator * (const v_int8x32& a, const v_int8x32& b)
{
@ -1291,6 +1291,16 @@ inline v_float32x8 v_absdiff(const v_float32x8& a, const v_float32x8& b)
inline v_float64x4 v_absdiff(const v_float64x4& a, const v_float64x4& b)
{ return v_abs(a - b); }
/** Saturating absolute difference **/
inline v_int8x32 v_absdiffs(const v_int8x32& a, const v_int8x32& b)
{
v_int8x32 d = a - b;
v_int8x32 m = a < b;
return (d ^ m) - m;
}
inline v_int16x16 v_absdiffs(const v_int16x16& a, const v_int16x16& b)
{ return v_max(a, b) - v_min(a, b); }
////////// Conversions /////////
/** Rounding **/
@ -1300,6 +1310,12 @@ inline v_int32x8 v_round(const v_float32x8& a)
inline v_int32x8 v_round(const v_float64x4& a)
{ return v_int32x8(_mm256_castsi128_si256(_mm256_cvtpd_epi32(a.val))); }
inline v_int32x8 v_round(const v_float64x4& a, const v_float64x4& b)
{
__m128i ai = _mm256_cvtpd_epi32(a.val), bi = _mm256_cvtpd_epi32(b.val);
return v_int32x8(_v256_combine(ai, bi));
}
inline v_int32x8 v_trunc(const v_float32x8& a)
{ return v_int32x8(_mm256_cvttps_epi32(a.val)); }
@ -1689,6 +1705,40 @@ void v_rshr_pack_store(int* ptr, const v_int64x4& a)
v_pack_store(ptr, (a + delta) >> n);
}
// pack boolean
inline v_uint8x32 v_pack_b(const v_uint16x16& a, const v_uint16x16& b)
{
__m256i ab = _mm256_packs_epi16(a.val, b.val);
return v_uint8x32(_v256_shuffle_odd_64(ab));
}
inline v_uint8x32 v_pack_b(const v_uint32x8& a, const v_uint32x8& b,
const v_uint32x8& c, const v_uint32x8& d)
{
__m256i ab = _mm256_packs_epi32(a.val, b.val);
__m256i cd = _mm256_packs_epi32(c.val, d.val);
__m256i abcd = _v256_shuffle_odd_64(_mm256_packs_epi16(ab, cd));
return v_uint8x32(_mm256_shuffle_epi32(abcd, _MM_SHUFFLE(3, 1, 2, 0)));
}
inline v_uint8x32 v_pack_b(const v_uint64x4& a, const v_uint64x4& b, const v_uint64x4& c,
const v_uint64x4& d, const v_uint64x4& e, const v_uint64x4& f,
const v_uint64x4& g, const v_uint64x4& h)
{
__m256i ab = _mm256_packs_epi32(a.val, b.val);
__m256i cd = _mm256_packs_epi32(c.val, d.val);
__m256i ef = _mm256_packs_epi32(e.val, f.val);
__m256i gh = _mm256_packs_epi32(g.val, h.val);
__m256i abcd = _mm256_packs_epi32(ab, cd);
__m256i efgh = _mm256_packs_epi32(ef, gh);
__m256i pkall = _v256_shuffle_odd_64(_mm256_packs_epi16(abcd, efgh));
__m256i rev = _mm256_alignr_epi8(pkall, pkall, 8);
return v_uint8x32(_mm256_unpacklo_epi16(pkall, rev));
}
/* Recombine */
// its up there with load and store operations

@ -109,7 +109,7 @@ These operations allow to reorder or recombine elements in one or multiple vecto
- Interleave, deinterleave (2, 3 and 4 channels): @ref v_load_deinterleave, @ref v_store_interleave
- Expand: @ref v_load_expand, @ref v_load_expand_q, @ref v_expand, @ref v_expand_low, @ref v_expand_high
- Pack: @ref v_pack, @ref v_pack_u, @ref v_rshr_pack, @ref v_rshr_pack_u,
- Pack: @ref v_pack, @ref v_pack_u, @ref v_pack_b, @ref v_rshr_pack, @ref v_rshr_pack_u,
@ref v_pack_store, @ref v_pack_u_store, @ref v_rshr_pack_store, @ref v_rshr_pack_u_store
- Recombine: @ref v_zip, @ref v_recombine, @ref v_combine_low, @ref v_combine_high
- Extract: @ref v_extract
@ -159,7 +159,7 @@ Most of these operations return only one value.
### Other math
- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude
- Absolute values: @ref v_abs, @ref v_absdiff
- Absolute values: @ref v_abs, @ref v_absdiff, @ref v_absdiffs
### Conversions
@ -199,10 +199,12 @@ Regular integers:
|logical | x | x | x | x | x | x |
|min, max | x | x | x | x | x | x |
|absdiff | x | x | x | x | x | x |
|absdiffs | | x | | x | | |
|reduce | | | | | x | x |
|mask | x | x | x | x | x | x |
|pack | x | x | x | x | x | x |
|pack_u | x | | x | | | |
|pack_b | x | | | | | |
|unpack | x | x | x | x | x | x |
|extract | x | x | x | x | x | x |
|rotate (lanes) | x | x | x | x | x | x |
@ -762,6 +764,19 @@ inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b)
return c;
}
/** @brief Saturating absolute difference
Returns \f$ saturate(|a - b|) \f$ .
For 8-, 16-bit signed integer source types. */
template<typename _Tp, int n>
inline v_reg<_Tp, n> v_absdiffs(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
{
v_reg<_Tp, n> c;
for( int i = 0; i < n; i++)
c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i]));
return c;
}
/** @brief Inversed square root
Returns \f$ 1/sqrt(a) \f$
@ -1613,6 +1628,18 @@ template<int n> inline v_reg<int, n> v_round(const v_reg<float, n>& a)
return c;
}
/** @overload */
template<int n> inline v_reg<int, n*2> v_round(const v_reg<double, n>& a, const v_reg<double, n>& b)
{
v_reg<int, n*2> c;
for( int i = 0; i < n; i++ )
{
c.s[i] = cvRound(a.s[i]);
c.s[i+n] = cvRound(b.s[i]);
}
return c;
}
/** @brief Floor
Floor each value. Input type is float vector ==> output type is int vector.*/
@ -2059,6 +2086,103 @@ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, s
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast)
//! @}
//! @cond IGNORED
template<typename _Tpm, typename _Tp, int n>
inline void _pack_b(_Tpm* mptr, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
{
for (int i = 0; i < n; ++i)
{
mptr[i] = (_Tpm)a.s[i];
mptr[i + n] = (_Tpm)b.s[i];
}
}
//! @endcond
//! @name Pack boolean values
//! @{
//! @brief Pack boolean values from multiple vectors to one unsigned 8-bit integer vector
//!
//! @note Must provide valid boolean values to guarantee same result for all architectures.
/** @brief
//! For 16-bit boolean values
Scheme:
@code
a {0xFFFF 0 0 0xFFFF 0 0xFFFF 0xFFFF 0}
b {0xFFFF 0 0xFFFF 0 0 0xFFFF 0 0xFFFF}
===============
{
0xFF 0 0 0xFF 0 0xFF 0xFF 0
0xFF 0 0xFF 0 0 0xFF 0 0xFF
}
@endcode */
inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
{
v_uint8x16 mask;
_pack_b(mask.s, a, b);
return mask;
}
/** @overload
For 32-bit boolean values
Scheme:
@code
a {0xFFFF.. 0 0 0xFFFF..}
b {0 0xFFFF.. 0xFFFF.. 0}
c {0xFFFF.. 0 0xFFFF.. 0}
d {0 0xFFFF.. 0 0xFFFF..}
===============
{
0xFF 0 0 0xFF 0 0xFF 0xFF 0
0xFF 0 0xFF 0 0 0xFF 0 0xFF
}
@endcode */
inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
const v_uint32x4& c, const v_uint32x4& d)
{
v_uint8x16 mask;
_pack_b(mask.s, a, b);
_pack_b(mask.s + 8, c, d);
return mask;
}
/** @overload
For 64-bit boolean values
Scheme:
@code
a {0xFFFF.. 0}
b {0 0xFFFF..}
c {0xFFFF.. 0}
d {0 0xFFFF..}
e {0xFFFF.. 0}
f {0xFFFF.. 0}
g {0 0xFFFF..}
h {0 0xFFFF..}
===============
{
0xFF 0 0 0xFF 0xFF 0 0 0xFF
0xFF 0 0xFF 0 0 0xFF 0 0xFF
}
@endcode */
inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
const v_uint64x2& g, const v_uint64x2& h)
{
v_uint8x16 mask;
_pack_b(mask.s, a, b);
_pack_b(mask.s + 4, c, d);
_pack_b(mask.s + 8, e, f);
_pack_b(mask.s + 12, g, h);
return mask;
}
//! @}
/** @brief Matrix multiplication
Scheme:

@ -394,6 +394,35 @@ OPENCV_HAL_IMPL_NEON_PACK(v_int32x4, int, int32x2_t, s32, v_int64x2, pack, vmovn
OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_int16x8, pack_u, vqmovun_s16, vqrshrun_n_s16)
OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_int32x4, pack_u, vqmovun_s32, vqrshrun_n_s32)
// pack boolean
inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
{
uint8x16_t ab = vcombine_u8(vmovn_u16(a.val), vmovn_u16(b.val));
return v_uint8x16(ab);
}
inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
const v_uint32x4& c, const v_uint32x4& d)
{
uint16x8_t nab = vcombine_u16(vmovn_u32(a.val), vmovn_u32(b.val));
uint16x8_t ncd = vcombine_u16(vmovn_u32(c.val), vmovn_u32(d.val));
return v_uint8x16(vcombine_u8(vmovn_u16(nab), vmovn_u16(ncd)));
}
inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
const v_uint64x2& g, const v_uint64x2& h)
{
uint32x4_t ab = vcombine_u32(vmovn_u64(a.val), vmovn_u64(b.val));
uint32x4_t cd = vcombine_u32(vmovn_u64(c.val), vmovn_u64(d.val));
uint32x4_t ef = vcombine_u32(vmovn_u64(e.val), vmovn_u64(f.val));
uint32x4_t gh = vcombine_u32(vmovn_u64(g.val), vmovn_u64(h.val));
uint16x8_t abcd = vcombine_u16(vmovn_u32(ab), vmovn_u32(cd));
uint16x8_t efgh = vcombine_u16(vmovn_u32(ef), vmovn_u32(gh));
return v_uint8x16(vcombine_u8(vmovn_u16(abcd), vmovn_u16(efgh)));
}
inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
const v_float32x4& m1, const v_float32x4& m2,
const v_float32x4& m3)
@ -748,7 +777,6 @@ OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_mul_wrap, vmulq_s8)
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_mul_wrap, vmulq_u16)
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_mul_wrap, vmulq_s16)
// TODO: absdiff for signed integers
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_absdiff, vabdq_u8)
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_absdiff, vabdq_u16)
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_absdiff, vabdq_u32)
@ -757,6 +785,12 @@ OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_absdiff, vabdq_f32)
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_absdiff, vabdq_f64)
#endif
/** Saturating absolute difference **/
inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b)
{ return v_int8x16(vqabsq_s8(vqsubq_s8(a.val, b.val))); }
inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b)
{ return v_int16x8(vqabsq_s16(vqsubq_s16(a.val, b.val))); }
#define OPENCV_HAL_IMPL_NEON_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin) \
inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \
{ \
@ -1242,6 +1276,11 @@ inline v_int32x4 v_round(const v_float64x2& a)
return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero));
}
inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b)
{
return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), vmovn_s64(vcvtaq_s64_f64(b.val))));
}
inline v_int32x4 v_floor(const v_float64x2& a)
{
static const int32x2_t zero = vdup_n_s32(0);

@ -634,6 +634,35 @@ void v_rshr_pack_store(int* ptr, const v_int64x2& a)
_mm_storel_epi64((__m128i*)ptr, a2);
}
// pack boolean
inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
{
__m128i ab = _mm_packs_epi16(a.val, b.val);
return v_uint8x16(ab);
}
inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
const v_uint32x4& c, const v_uint32x4& d)
{
__m128i ab = _mm_packs_epi32(a.val, b.val);
__m128i cd = _mm_packs_epi32(c.val, d.val);
return v_uint8x16(_mm_packs_epi16(ab, cd));
}
inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
const v_uint64x2& g, const v_uint64x2& h)
{
__m128i ab = _mm_packs_epi32(a.val, b.val);
__m128i cd = _mm_packs_epi32(c.val, d.val);
__m128i ef = _mm_packs_epi32(e.val, f.val);
__m128i gh = _mm_packs_epi32(g.val, h.val);
__m128i abcd = _mm_packs_epi32(ab, cd);
__m128i efgh = _mm_packs_epi32(ef, gh);
return v_uint8x16(_mm_packs_epi16(abcd, efgh));
}
inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
const v_float32x4& m1, const v_float32x4& m2,
const v_float32x4& m3)
@ -706,19 +735,11 @@ OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int64x2, _mm_sub_epi64)
inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \
{ a = a * b; return a; }
OPENCV_HAL_IMPL_SSE_MUL_SAT(v_uint8x16, v_uint16x8)
OPENCV_HAL_IMPL_SSE_MUL_SAT(v_int8x16, v_int16x8)
OPENCV_HAL_IMPL_SSE_MUL_SAT(v_uint16x8, v_uint32x4)
OPENCV_HAL_IMPL_SSE_MUL_SAT(v_int16x8, v_int32x4)
inline v_uint8x16 operator * (const v_uint8x16& a, const v_uint8x16& b)
{
v_uint16x8 c, d;
v_mul_expand(a, b, c, d);
return v_pack_u(v_reinterpret_as_s16(c), v_reinterpret_as_s16(d));
}
inline v_uint8x16& operator *= (v_uint8x16& a, const v_uint8x16& b)
{ a = a * b; return a; }
// Multiply and expand
inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b,
v_uint16x8& c, v_uint16x8& d)
@ -1045,34 +1066,43 @@ inline v_int8x16 v_mul_wrap(const v_int8x16& a, const v_int8x16& b)
return v_reinterpret_as_s8(v_mul_wrap(v_reinterpret_as_u8(a), v_reinterpret_as_u8(b)));
}
#define OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(_Tpuvec, _Tpsvec, bits, smask32) \
inline _Tpuvec v_absdiff(const _Tpuvec& a, const _Tpuvec& b) \
{ \
return _Tpuvec(_mm_add_epi##bits(_mm_subs_epu##bits(a.val, b.val), _mm_subs_epu##bits(b.val, a.val))); \
} \
inline _Tpuvec v_absdiff(const _Tpsvec& a, const _Tpsvec& b) \
{ \
__m128i smask = _mm_set1_epi32(smask32); \
__m128i a1 = _mm_xor_si128(a.val, smask); \
__m128i b1 = _mm_xor_si128(b.val, smask); \
return _Tpuvec(_mm_add_epi##bits(_mm_subs_epu##bits(a1, b1), _mm_subs_epu##bits(b1, a1))); \
}
OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(v_uint8x16, v_int8x16, 8, (int)0x80808080)
OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(v_uint16x8, v_int16x8, 16, (int)0x80008000)
/** Absolute difference **/
inline v_uint8x16 v_absdiff(const v_uint8x16& a, const v_uint8x16& b)
{ return v_add_wrap(a - b, b - a); }
inline v_uint16x8 v_absdiff(const v_uint16x8& a, const v_uint16x8& b)
{ return v_add_wrap(a - b, b - a); }
inline v_uint32x4 v_absdiff(const v_uint32x4& a, const v_uint32x4& b)
{ return v_max(a, b) - v_min(a, b); }
inline v_uint8x16 v_absdiff(const v_int8x16& a, const v_int8x16& b)
{
return v_max(a, b) - v_min(a, b);
v_int8x16 d = v_sub_wrap(a, b);
v_int8x16 m = a < b;
return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m));
}
inline v_uint16x8 v_absdiff(const v_int16x8& a, const v_int16x8& b)
{
return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b)));
}
inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b)
{
__m128i d = _mm_sub_epi32(a.val, b.val);
__m128i m = _mm_cmpgt_epi32(b.val, a.val);
return v_uint32x4(_mm_sub_epi32(_mm_xor_si128(d, m), m));
v_int32x4 d = a - b;
v_int32x4 m = a < b;
return v_reinterpret_as_u32((d ^ m) - m);
}
/** Saturating absolute difference **/
inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b)
{
v_int8x16 d = a - b;
v_int8x16 m = a < b;
return (d ^ m) - m;
}
inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b)
{ return v_max(a, b) - v_min(a, b); }
inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
{
return a * b + c;
@ -1623,6 +1653,12 @@ inline v_int32x4 v_trunc(const v_float32x4& a)
inline v_int32x4 v_round(const v_float64x2& a)
{ return v_int32x4(_mm_cvtpd_epi32(a.val)); }
inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b)
{
__m128i ai = _mm_cvtpd_epi32(a.val), bi = _mm_cvtpd_epi32(b.val);
return v_int32x4(_mm_unpacklo_epi64(ai, bi));
}
inline v_int32x4 v_floor(const v_float64x2& a)
{
__m128i a1 = _mm_cvtpd_epi32(a.val);

@ -383,6 +383,35 @@ OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_int32x4, unsigned int, int,
//OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_int64x2, unsigned long long, long long,
// vec_sra, vec_packsu, vec_add, pack_u)
// pack boolean
inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
{
vec_uchar16 ab = vec_pack(a.val, b.val);
return v_uint8x16(ab);
}
inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
const v_uint32x4& c, const v_uint32x4& d)
{
vec_ushort8 ab = vec_pack(a.val, b.val);
vec_ushort8 cd = vec_pack(c.val, d.val);
return v_uint8x16(vec_pack(ab, cd));
}
inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
const v_uint64x2& g, const v_uint64x2& h)
{
vec_uint4 ab = vec_pack(a.val, b.val);
vec_uint4 cd = vec_pack(c.val, d.val);
vec_uint4 ef = vec_pack(e.val, f.val);
vec_uint4 gh = vec_pack(g.val, h.val);
vec_ushort8 abcd = vec_pack(ab, cd);
vec_ushort8 efgh = vec_pack(ef, gh);
return v_uint8x16(vec_pack(abcd, efgh));
}
/* Recombine */
template <typename _Tpvec>
inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1)
@ -834,16 +863,27 @@ inline v_float32x4 v_abs(const v_float32x4& x)
inline v_float64x2 v_abs(const v_float64x2& x)
{ return v_float64x2(vec_abs(x.val)); }
/** Absolute difference **/
// unsigned
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_absdiff, vec_absd)
#define OPENCV_HAL_IMPL_VSX_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin) \
inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec2(cast(intrin(a.val, b.val))); }
inline v_uint8x16 v_absdiff(const v_int8x16& a, const v_int8x16& b)
{ return v_reinterpret_as_u8(v_sub_wrap(v_max(a, b), v_min(a, b))); }
inline v_uint16x8 v_absdiff(const v_int16x8& a, const v_int16x8& b)
{ return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); }
inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b)
{ return v_reinterpret_as_u32(v_max(a, b) - v_min(a, b)); }
OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int8x16, v_uint8x16, vec_uchar16_c, v_absdiff, vec_absd)
OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int16x8, v_uint16x8, vec_ushort8_c, v_absdiff, vec_absd)
OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int32x4, v_uint32x4, vec_uint4_c, v_absdiff, vec_absd)
OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int64x2, v_uint64x2, vec_udword2_c, v_absdiff, vec_absd)
inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b)
{ return v_abs(a - b); }
inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b)
{ return v_abs(a - b); }
/** Absolute difference for signed integers **/
inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b)
{ return v_int8x16(vec_abss(vec_subs(a.val, b.val))); }
inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b)
{ return v_int16x8(vec_abss(vec_subs(a.val, b.val))); }
////////// Conversions /////////
@ -854,6 +894,9 @@ inline v_int32x4 v_round(const v_float32x4& a)
inline v_int32x4 v_round(const v_float64x2& a)
{ return v_int32x4(vec_mergesqo(vec_ctso(vec_rint(a.val)), vec_int4_z)); }
inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b)
{ return v_int32x4(vec_mergesqo(vec_ctso(vec_rint(a.val)), vec_ctso(vec_rint(b.val)))); }
inline v_int32x4 v_floor(const v_float32x4& a)
{ return v_int32x4(vec_cts(vec_floor(a.val))); }

File diff suppressed because it is too large Load Diff

@ -0,0 +1,11 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
#include "precomp.hpp"
#include "arithm_ipp.hpp"
#include "arithm.simd.hpp"
#include "arithm.simd_declarations.hpp"
#define ARITHM_DISPATCHING_ONLY
#include "arithm.simd.hpp"

File diff suppressed because it is too large Load Diff

@ -1,623 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_ARITHM_CORE_HPP__
#define __OPENCV_ARITHM_CORE_HPP__
#include "arithm_simd.hpp"
namespace cv {
template<typename T1, typename T2=T1, typename T3=T1> struct OpAdd
{
typedef T1 type1;
typedef T2 type2;
typedef T3 rtype;
T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a + b); }
};
template<typename T1, typename T2=T1, typename T3=T1> struct OpSub
{
typedef T1 type1;
typedef T2 type2;
typedef T3 rtype;
T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a - b); }
};
template<typename T1, typename T2=T1, typename T3=T1> struct OpRSub
{
typedef T1 type1;
typedef T2 type2;
typedef T3 rtype;
T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(b - a); }
};
template<typename T> struct OpMin
{
typedef T type1;
typedef T type2;
typedef T rtype;
T operator ()(const T a, const T b) const { return std::min(a, b); }
};
template<typename T> struct OpMax
{
typedef T type1;
typedef T type2;
typedef T rtype;
T operator ()(const T a, const T b) const { return std::max(a, b); }
};
template<typename T> struct OpAbsDiff
{
typedef T type1;
typedef T type2;
typedef T rtype;
T operator()(T a, T b) const { return a > b ? a - b : b - a; }
};
// specializations to prevent "-0" results
template<> struct OpAbsDiff<float>
{
typedef float type1;
typedef float type2;
typedef float rtype;
float operator()(float a, float b) const { return std::abs(a - b); }
};
template<> struct OpAbsDiff<double>
{
typedef double type1;
typedef double type2;
typedef double rtype;
double operator()(double a, double b) const { return std::abs(a - b); }
};
template<typename T> struct OpAnd
{
typedef T type1;
typedef T type2;
typedef T rtype;
T operator()( T a, T b ) const { return a & b; }
};
template<typename T> struct OpOr
{
typedef T type1;
typedef T type2;
typedef T rtype;
T operator()( T a, T b ) const { return a | b; }
};
template<typename T> struct OpXor
{
typedef T type1;
typedef T type2;
typedef T rtype;
T operator()( T a, T b ) const { return a ^ b; }
};
template<typename T> struct OpNot
{
typedef T type1;
typedef T type2;
typedef T rtype;
T operator()( T a, T ) const { return ~a; }
};
//=============================================================================
template<typename T, class Op, class VOp>
void vBinOp(const T* src1, size_t step1, const T* src2, size_t step2, T* dst, size_t step, int width, int height)
{
#if CV_SSE2 || CV_NEON
VOp vop;
#endif
Op op;
for( ; height--; src1 = (const T *)((const uchar *)src1 + step1),
src2 = (const T *)((const uchar *)src2 + step2),
dst = (T *)((uchar *)dst + step) )
{
int x = 0;
#if CV_NEON || CV_SSE2
#if CV_AVX2
if( USE_AVX2 )
{
for( ; x <= width - 32/(int)sizeof(T); x += 32/sizeof(T) )
{
typename VLoadStore256<T>::reg_type r0 = VLoadStore256<T>::load(src1 + x);
r0 = vop(r0, VLoadStore256<T>::load(src2 + x));
VLoadStore256<T>::store(dst + x, r0);
}
}
#else
#if CV_SSE2
if( USE_SSE2 )
{
#endif // CV_SSE2
for( ; x <= width - 32/(int)sizeof(T); x += 32/sizeof(T) )
{
typename VLoadStore128<T>::reg_type r0 = VLoadStore128<T>::load(src1 + x );
typename VLoadStore128<T>::reg_type r1 = VLoadStore128<T>::load(src1 + x + 16/sizeof(T));
r0 = vop(r0, VLoadStore128<T>::load(src2 + x ));
r1 = vop(r1, VLoadStore128<T>::load(src2 + x + 16/sizeof(T)));
VLoadStore128<T>::store(dst + x , r0);
VLoadStore128<T>::store(dst + x + 16/sizeof(T), r1);
}
#if CV_SSE2
}
#endif // CV_SSE2
#endif // CV_AVX2
#endif // CV_NEON || CV_SSE2
#if CV_AVX2
// nothing
#elif CV_SSE2
if( USE_SSE2 )
{
for( ; x <= width - 8/(int)sizeof(T); x += 8/sizeof(T) )
{
typename VLoadStore64<T>::reg_type r = VLoadStore64<T>::load(src1 + x);
r = vop(r, VLoadStore64<T>::load(src2 + x));
VLoadStore64<T>::store(dst + x, r);
}
}
#endif
#if CV_ENABLE_UNROLLED
for( ; x <= width - 4; x += 4 )
{
T v0 = op(src1[x], src2[x]);
T v1 = op(src1[x+1], src2[x+1]);
dst[x] = v0; dst[x+1] = v1;
v0 = op(src1[x+2], src2[x+2]);
v1 = op(src1[x+3], src2[x+3]);
dst[x+2] = v0; dst[x+3] = v1;
}
#endif
for( ; x < width; x++ )
dst[x] = op(src1[x], src2[x]);
}
}
template<typename T, class Op, class Op32>
void vBinOp32(const T* src1, size_t step1, const T* src2, size_t step2,
T* dst, size_t step, int width, int height)
{
#if CV_SSE2 || CV_NEON
Op32 op32;
#endif
Op op;
for( ; height--; src1 = (const T *)((const uchar *)src1 + step1),
src2 = (const T *)((const uchar *)src2 + step2),
dst = (T *)((uchar *)dst + step) )
{
int x = 0;
#if CV_AVX2
if( USE_AVX2 )
{
if( (((size_t)src1|(size_t)src2|(size_t)dst)&31) == 0 )
{
for( ; x <= width - 8; x += 8 )
{
typename VLoadStore256Aligned<T>::reg_type r0 = VLoadStore256Aligned<T>::load(src1 + x);
r0 = op32(r0, VLoadStore256Aligned<T>::load(src2 + x));
VLoadStore256Aligned<T>::store(dst + x, r0);
}
}
}
#elif CV_SSE2
if( USE_SSE2 )
{
if( (((size_t)src1|(size_t)src2|(size_t)dst)&15) == 0 )
{
for( ; x <= width - 8; x += 8 )
{
typename VLoadStore128Aligned<T>::reg_type r0 = VLoadStore128Aligned<T>::load(src1 + x );
typename VLoadStore128Aligned<T>::reg_type r1 = VLoadStore128Aligned<T>::load(src1 + x + 4);
r0 = op32(r0, VLoadStore128Aligned<T>::load(src2 + x ));
r1 = op32(r1, VLoadStore128Aligned<T>::load(src2 + x + 4));
VLoadStore128Aligned<T>::store(dst + x , r0);
VLoadStore128Aligned<T>::store(dst + x + 4, r1);
}
}
}
#endif // CV_AVX2
#if CV_NEON || CV_SSE2
#if CV_AVX2
if( USE_AVX2 )
{
for( ; x <= width - 8; x += 8 )
{
typename VLoadStore256<T>::reg_type r0 = VLoadStore256<T>::load(src1 + x);
r0 = op32(r0, VLoadStore256<T>::load(src2 + x));
VLoadStore256<T>::store(dst + x, r0);
}
}
#else
#if CV_SSE2
if( USE_SSE2 )
{
#endif // CV_SSE2
for( ; x <= width - 8; x += 8 )
{
typename VLoadStore128<T>::reg_type r0 = VLoadStore128<T>::load(src1 + x );
typename VLoadStore128<T>::reg_type r1 = VLoadStore128<T>::load(src1 + x + 4);
r0 = op32(r0, VLoadStore128<T>::load(src2 + x ));
r1 = op32(r1, VLoadStore128<T>::load(src2 + x + 4));
VLoadStore128<T>::store(dst + x , r0);
VLoadStore128<T>::store(dst + x + 4, r1);
}
#if CV_SSE2
}
#endif // CV_SSE2
#endif // CV_AVX2
#endif // CV_NEON || CV_SSE2
#if CV_ENABLE_UNROLLED
for( ; x <= width - 4; x += 4 )
{
T v0 = op(src1[x], src2[x]);
T v1 = op(src1[x+1], src2[x+1]);
dst[x] = v0; dst[x+1] = v1;
v0 = op(src1[x+2], src2[x+2]);
v1 = op(src1[x+3], src2[x+3]);
dst[x+2] = v0; dst[x+3] = v1;
}
#endif
for( ; x < width; x++ )
dst[x] = op(src1[x], src2[x]);
}
}
template<typename T, class Op, class Op64>
void vBinOp64(const T* src1, size_t step1, const T* src2, size_t step2,
T* dst, size_t step, int width, int height)
{
#if CV_SSE2
Op64 op64;
#endif
Op op;
for( ; height--; src1 = (const T *)((const uchar *)src1 + step1),
src2 = (const T *)((const uchar *)src2 + step2),
dst = (T *)((uchar *)dst + step) )
{
int x = 0;
#if CV_AVX2
if( USE_AVX2 )
{
if( (((size_t)src1|(size_t)src2|(size_t)dst)&31) == 0 )
{
for( ; x <= width - 4; x += 4 )
{
typename VLoadStore256Aligned<T>::reg_type r0 = VLoadStore256Aligned<T>::load(src1 + x);
r0 = op64(r0, VLoadStore256Aligned<T>::load(src2 + x));
VLoadStore256Aligned<T>::store(dst + x, r0);
}
}
}
#elif CV_SSE2
if( USE_SSE2 )
{
if( (((size_t)src1|(size_t)src2|(size_t)dst)&15) == 0 )
{
for( ; x <= width - 4; x += 4 )
{
typename VLoadStore128Aligned<T>::reg_type r0 = VLoadStore128Aligned<T>::load(src1 + x );
typename VLoadStore128Aligned<T>::reg_type r1 = VLoadStore128Aligned<T>::load(src1 + x + 2);
r0 = op64(r0, VLoadStore128Aligned<T>::load(src2 + x ));
r1 = op64(r1, VLoadStore128Aligned<T>::load(src2 + x + 2));
VLoadStore128Aligned<T>::store(dst + x , r0);
VLoadStore128Aligned<T>::store(dst + x + 2, r1);
}
}
}
#endif
for( ; x <= width - 4; x += 4 )
{
T v0 = op(src1[x], src2[x]);
T v1 = op(src1[x+1], src2[x+1]);
dst[x] = v0; dst[x+1] = v1;
v0 = op(src1[x+2], src2[x+2]);
v1 = op(src1[x+3], src2[x+3]);
dst[x+2] = v0; dst[x+3] = v1;
}
for( ; x < width; x++ )
dst[x] = op(src1[x], src2[x]);
}
}
template<typename T> static void
cmp_(const T* src1, size_t step1, const T* src2, size_t step2,
uchar* dst, size_t step, int width, int height, int code)
{
step1 /= sizeof(src1[0]);
step2 /= sizeof(src2[0]);
if( code == CMP_GE || code == CMP_LT )
{
std::swap(src1, src2);
std::swap(step1, step2);
code = code == CMP_GE ? CMP_LE : CMP_GT;
}
Cmp_SIMD<T> vop(code);
if( code == CMP_GT || code == CMP_LE )
{
int m = code == CMP_GT ? 0 : 255;
for( ; height--; src1 += step1, src2 += step2, dst += step )
{
int x = vop(src1, src2, dst, width);
#if CV_ENABLE_UNROLLED
for( ; x <= width - 4; x += 4 )
{
int t0, t1;
t0 = -(src1[x] > src2[x]) ^ m;
t1 = -(src1[x+1] > src2[x+1]) ^ m;
dst[x] = (uchar)t0; dst[x+1] = (uchar)t1;
t0 = -(src1[x+2] > src2[x+2]) ^ m;
t1 = -(src1[x+3] > src2[x+3]) ^ m;
dst[x+2] = (uchar)t0; dst[x+3] = (uchar)t1;
}
#endif
for( ; x < width; x++ )
dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
}
}
else if( code == CMP_EQ || code == CMP_NE )
{
int m = code == CMP_EQ ? 0 : 255;
for( ; height--; src1 += step1, src2 += step2, dst += step )
{
int x = 0;
#if CV_ENABLE_UNROLLED
for( ; x <= width - 4; x += 4 )
{
int t0, t1;
t0 = -(src1[x] == src2[x]) ^ m;
t1 = -(src1[x+1] == src2[x+1]) ^ m;
dst[x] = (uchar)t0; dst[x+1] = (uchar)t1;
t0 = -(src1[x+2] == src2[x+2]) ^ m;
t1 = -(src1[x+3] == src2[x+3]) ^ m;
dst[x+2] = (uchar)t0; dst[x+3] = (uchar)t1;
}
#endif
for( ; x < width; x++ )
dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
}
}
}
template<typename T, typename WT> static void
mul_( const T* src1, size_t step1, const T* src2, size_t step2,
T* dst, size_t step, int width, int height, WT scale )
{
step1 /= sizeof(src1[0]);
step2 /= sizeof(src2[0]);
step /= sizeof(dst[0]);
Mul_SIMD<T, WT> vop;
if( scale == (WT)1. )
{
for( ; height--; src1 += step1, src2 += step2, dst += step )
{
int i = vop(src1, src2, dst, width, scale);
#if CV_ENABLE_UNROLLED
for(; i <= width - 4; i += 4 )
{
T t0;
T t1;
t0 = saturate_cast<T>(src1[i ] * src2[i ]);
t1 = saturate_cast<T>(src1[i+1] * src2[i+1]);
dst[i ] = t0;
dst[i+1] = t1;
t0 = saturate_cast<T>(src1[i+2] * src2[i+2]);
t1 = saturate_cast<T>(src1[i+3] * src2[i+3]);
dst[i+2] = t0;
dst[i+3] = t1;
}
#endif
for( ; i < width; i++ )
dst[i] = saturate_cast<T>(src1[i] * src2[i]);
}
}
else
{
for( ; height--; src1 += step1, src2 += step2, dst += step )
{
int i = vop(src1, src2, dst, width, scale);
#if CV_ENABLE_UNROLLED
for(; i <= width - 4; i += 4 )
{
T t0 = saturate_cast<T>(scale*(WT)src1[i]*src2[i]);
T t1 = saturate_cast<T>(scale*(WT)src1[i+1]*src2[i+1]);
dst[i] = t0; dst[i+1] = t1;
t0 = saturate_cast<T>(scale*(WT)src1[i+2]*src2[i+2]);
t1 = saturate_cast<T>(scale*(WT)src1[i+3]*src2[i+3]);
dst[i+2] = t0; dst[i+3] = t1;
}
#endif
for( ; i < width; i++ )
dst[i] = saturate_cast<T>(scale*(WT)src1[i]*src2[i]);
}
}
}
template<typename T> static void
div_i( const T* src1, size_t step1, const T* src2, size_t step2,
T* dst, size_t step, int width, int height, double scale )
{
step1 /= sizeof(src1[0]);
step2 /= sizeof(src2[0]);
step /= sizeof(dst[0]);
Div_SIMD<T> vop;
float scale_f = (float)scale;
for( ; height--; src1 += step1, src2 += step2, dst += step )
{
int i = vop(src1, src2, dst, width, scale);
for( ; i < width; i++ )
{
T num = src1[i], denom = src2[i];
dst[i] = denom != 0 ? saturate_cast<T>(num*scale_f/denom) : (T)0;
}
}
}
template<typename T> static void
div_f( const T* src1, size_t step1, const T* src2, size_t step2,
T* dst, size_t step, int width, int height, double scale )
{
T scale_f = (T)scale;
step1 /= sizeof(src1[0]);
step2 /= sizeof(src2[0]);
step /= sizeof(dst[0]);
Div_SIMD<T> vop;
for( ; height--; src1 += step1, src2 += step2, dst += step )
{
int i = vop(src1, src2, dst, width, scale);
for( ; i < width; i++ )
{
T num = src1[i], denom = src2[i];
dst[i] = denom != 0 ? saturate_cast<T>(num*scale_f/denom) : (T)0;
}
}
}
template<typename T> static void
recip_i( const T* src2, size_t step2,
T* dst, size_t step, int width, int height, double scale )
{
step2 /= sizeof(src2[0]);
step /= sizeof(dst[0]);
Recip_SIMD<T> vop;
float scale_f = (float)scale;
for( ; height--; src2 += step2, dst += step )
{
int i = vop(src2, dst, width, scale);
for( ; i < width; i++ )
{
T denom = src2[i];
dst[i] = denom != 0 ? saturate_cast<T>(scale_f/denom) : (T)0;
}
}
}
template<typename T> static void
recip_f( const T* src2, size_t step2,
T* dst, size_t step, int width, int height, double scale )
{
T scale_f = (T)scale;
step2 /= sizeof(src2[0]);
step /= sizeof(dst[0]);
Recip_SIMD<T> vop;
for( ; height--; src2 += step2, dst += step )
{
int i = vop(src2, dst, width, scale);
for( ; i < width; i++ )
{
T denom = src2[i];
dst[i] = denom != 0 ? saturate_cast<T>(scale_f/denom) : (T)0;
}
}
}
template<typename T, typename WT> static void
addWeighted_( const T* src1, size_t step1, const T* src2, size_t step2,
T* dst, size_t step, int width, int height, void* _scalars )
{
const double* scalars = (const double*)_scalars;
WT alpha = (WT)scalars[0], beta = (WT)scalars[1], gamma = (WT)scalars[2];
step1 /= sizeof(src1[0]);
step2 /= sizeof(src2[0]);
step /= sizeof(dst[0]);
AddWeighted_SIMD<T, WT> vop;
for( ; height--; src1 += step1, src2 += step2, dst += step )
{
int x = vop(src1, src2, dst, width, alpha, beta, gamma);
#if CV_ENABLE_UNROLLED
for( ; x <= width - 4; x += 4 )
{
T t0 = saturate_cast<T>(src1[x]*alpha + src2[x]*beta + gamma);
T t1 = saturate_cast<T>(src1[x+1]*alpha + src2[x+1]*beta + gamma);
dst[x] = t0; dst[x+1] = t1;
t0 = saturate_cast<T>(src1[x+2]*alpha + src2[x+2]*beta + gamma);
t1 = saturate_cast<T>(src1[x+3]*alpha + src2[x+3]*beta + gamma);
dst[x+2] = t0; dst[x+3] = t1;
}
#endif
for( ; x < width; x++ )
dst[x] = saturate_cast<T>(src1[x]*alpha + src2[x]*beta + gamma);
}
}
} // cv::
#endif // __OPENCV_ARITHM_CORE_HPP__

@ -0,0 +1,417 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
#if ARITHM_USE_IPP
namespace cv { namespace hal {
//=======================================
// Arithmetic and logical operations
// +, -, *, /, &, |, ^, ~, abs ...
//=======================================
#define ARITHM_IPP_BIN(fun, ...) \
do { \
if (!CV_IPP_CHECK_COND) \
return 0; \
if (height == 1) \
step1 = step2 = step = width * sizeof(dst[0]); \
if (0 <= CV_INSTRUMENT_FUN_IPP(fun, __VA_ARGS__)) \
{ \
CV_IMPL_ADD(CV_IMPL_IPP); \
return 1; \
} \
setIppErrorStatus(); \
return 0; \
} while(0)
//=======================================
// Addition
//=======================================
inline int arithm_ipp_add8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiAdd_8u_C1RSfs, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_add16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
ushort* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiAdd_16u_C1RSfs, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_add16s(const short* src1, size_t step1, const short* src2, size_t step2,
short* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiAdd_16s_C1RSfs, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_add32f(const float* src1, size_t step1, const float* src2, size_t step2,
float* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiAdd_32f_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
#define arithm_ipp_add8s(...) 0
#define arithm_ipp_add32s(...) 0
#define arithm_ipp_add64f(...) 0
//=======================================
// Subtract
//=======================================
inline int arithm_ipp_sub8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiSub_8u_C1RSfs, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_sub16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
ushort* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiSub_16u_C1RSfs, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_sub16s(const short* src1, size_t step1, const short* src2, size_t step2,
short* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiSub_16s_C1RSfs, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_sub32f(const float* src1, size_t step1, const float* src2, size_t step2,
float* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiSub_32f_C1R, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height));
}
#define arithm_ipp_sub8s(...) 0
#define arithm_ipp_sub32s(...) 0
#define arithm_ipp_sub64f(...) 0
///////////////////////////////////////////////////////////////////////////////////////////////////
#define ARITHM_IPP_MIN_MAX(fun, type) \
do { \
if (!CV_IPP_CHECK_COND) \
return 0; \
type* s1 = (type*)src1; \
type* s2 = (type*)src2; \
type* d = dst; \
if (height == 1) \
step1 = step2 = step = width * sizeof(dst[0]); \
int i = 0; \
for(; i < height; i++) \
{ \
if (0 > CV_INSTRUMENT_FUN_IPP(fun, s1, s2, d, width)) \
break; \
s1 = (type*)((uchar*)s1 + step1); \
s2 = (type*)((uchar*)s2 + step2); \
d = (type*)((uchar*)d + step); \
} \
if (i == height) \
{ \
CV_IMPL_ADD(CV_IMPL_IPP); \
return 1; \
} \
setIppErrorStatus(); \
return 0; \
} while(0)
//=======================================
// Max
//=======================================
inline int arithm_ipp_max8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height)
{
ARITHM_IPP_MIN_MAX(ippsMaxEvery_8u, uchar);
}
inline int arithm_ipp_max16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
ushort* dst, size_t step, int width, int height)
{
ARITHM_IPP_MIN_MAX(ippsMaxEvery_16u, ushort);
}
inline int arithm_ipp_max32f(const float* src1, size_t step1, const float* src2, size_t step2,
float* dst, size_t step, int width, int height)
{
ARITHM_IPP_MIN_MAX(ippsMaxEvery_32f, float);
}
inline int arithm_ipp_max64f(const double* src1, size_t step1, const double* src2, size_t step2,
double* dst, size_t step, int width, int height)
{
ARITHM_IPP_MIN_MAX(ippsMaxEvery_64f, double);
}
#define arithm_ipp_max8s(...) 0
#define arithm_ipp_max16s(...) 0
#define arithm_ipp_max32s(...) 0
//=======================================
// Min
//=======================================
inline int arithm_ipp_min8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height)
{
ARITHM_IPP_MIN_MAX(ippsMinEvery_8u, uchar);
}
inline int arithm_ipp_min16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
ushort* dst, size_t step, int width, int height)
{
ARITHM_IPP_MIN_MAX(ippsMinEvery_16u, ushort);
}
inline int arithm_ipp_min32f(const float* src1, size_t step1, const float* src2,size_t step2,
float* dst, size_t step, int width, int height)
{
ARITHM_IPP_MIN_MAX(ippsMinEvery_32f, float);
}
inline int arithm_ipp_min64f(const double* src1, size_t step1, const double* src2, size_t step2,
double* dst, size_t step, int width, int height)
{
ARITHM_IPP_MIN_MAX(ippsMinEvery_64f, double);
}
#define arithm_ipp_min8s(...) 0
#define arithm_ipp_min16s(...) 0
#define arithm_ipp_min32s(...) 0
//=======================================
// AbsDiff
//=======================================
inline int arithm_ipp_absdiff8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiAbsDiff_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_absdiff16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
ushort* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiAbsDiff_16u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_absdiff32f(const float* src1, size_t step1, const float* src2, size_t step2,
float* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiAbsDiff_32f_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
#define arithm_ipp_absdiff8s(...) 0
#define arithm_ipp_absdiff16s(...) 0
#define arithm_ipp_absdiff32s(...) 0
#define arithm_ipp_absdiff64f(...) 0
//=======================================
// Logical
//=======================================
inline int arithm_ipp_and8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiAnd_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_or8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiOr_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_xor8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiXor_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_not8u(const uchar* src1, size_t step1, uchar* dst, size_t step, int width, int height)
{
if (!CV_IPP_CHECK_COND)
return 0;
if (height == 1)
step1 = step = width * sizeof(dst[0]);
if (0 <= CV_INSTRUMENT_FUN_IPP(ippiNot_8u_C1R, src1, (int)step1, dst, (int)step, ippiSize(width, height)))
{
CV_IMPL_ADD(CV_IMPL_IPP);
return 1;
}
setIppErrorStatus();
return 0;
}
//=======================================
// Compare
//=======================================
#define ARITHM_IPP_CMP(fun, ...) \
do { \
if (!CV_IPP_CHECK_COND) \
return 0; \
IppCmpOp op = arithm_ipp_convert_cmp(cmpop); \
if (op < 0) \
return 0; \
if (height == 1) \
step1 = step2 = step = width * sizeof(dst[0]); \
if (0 <= CV_INSTRUMENT_FUN_IPP(fun, __VA_ARGS__, op)) \
{ \
CV_IMPL_ADD(CV_IMPL_IPP); \
return 1; \
} \
setIppErrorStatus(); \
return 0; \
} while(0)
inline IppCmpOp arithm_ipp_convert_cmp(int cmpop)
{
switch(cmpop)
{
case CMP_EQ: return ippCmpEq;
case CMP_GT: return ippCmpGreater;
case CMP_GE: return ippCmpGreaterEq;
case CMP_LT: return ippCmpLess;
case CMP_LE: return ippCmpLessEq;
default: return (IppCmpOp)-1;
}
}
inline int arithm_ipp_cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height, int cmpop)
{
ARITHM_IPP_CMP(ippiCompare_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
uchar* dst, size_t step, int width, int height, int cmpop)
{
ARITHM_IPP_CMP(ippiCompare_16u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_cmp16s(const short* src1, size_t step1, const short* src2, size_t step2,
uchar* dst, size_t step, int width, int height, int cmpop)
{
ARITHM_IPP_CMP(ippiCompare_16s_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_cmp32f(const float* src1, size_t step1, const float* src2, size_t step2,
uchar* dst, size_t step, int width, int height, int cmpop)
{
ARITHM_IPP_CMP(ippiCompare_32f_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
#define arithm_ipp_cmp8s(...) 0
#define arithm_ipp_cmp32s(...) 0
#define arithm_ipp_cmp64f(...) 0
//=======================================
// Multiply
//=======================================
#define ARITHM_IPP_MUL(fun, ...) \
do { \
if (!CV_IPP_CHECK_COND) \
return 0; \
float fscale = (float)scale; \
if (std::fabs(fscale - 1) > FLT_EPSILON) \
return 0; \
if (0 <= CV_INSTRUMENT_FUN_IPP(fun, __VA_ARGS__)) \
{ \
CV_IMPL_ADD(CV_IMPL_IPP); \
return 1; \
} \
setIppErrorStatus(); \
return 0; \
} while(0)
inline int arithm_ipp_mul8u(const uchar *src1, size_t step1, const uchar *src2, size_t step2,
uchar *dst, size_t step, int width, int height, double scale)
{
ARITHM_IPP_MUL(ippiMul_8u_C1RSfs, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_mul16u(const ushort *src1, size_t step1, const ushort *src2, size_t step2,
ushort *dst, size_t step, int width, int height, double scale)
{
ARITHM_IPP_MUL(ippiMul_16u_C1RSfs, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_mul16s(const short *src1, size_t step1, const short *src2, size_t step2,
short *dst, size_t step, int width, int height, double scale)
{
ARITHM_IPP_MUL(ippiMul_16s_C1RSfs, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_mul32f(const float *src1, size_t step1, const float *src2, size_t step2,
float *dst, size_t step, int width, int height, double scale)
{
ARITHM_IPP_MUL(ippiMul_32f_C1R, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height));
}
#define arithm_ipp_mul8s(...) 0
#define arithm_ipp_mul32s(...) 0
#define arithm_ipp_mul64f(...) 0
//=======================================
// Div
//=======================================
#define arithm_ipp_div8u(...) 0
#define arithm_ipp_div8s(...) 0
#define arithm_ipp_div16u(...) 0
#define arithm_ipp_div16s(...) 0
#define arithm_ipp_div32s(...) 0
#define arithm_ipp_div32f(...) 0
#define arithm_ipp_div64f(...) 0
//=======================================
// AddWeighted
//=======================================
#define arithm_ipp_addWeighted8u(...) 0
#define arithm_ipp_addWeighted8s(...) 0
#define arithm_ipp_addWeighted16u(...) 0
#define arithm_ipp_addWeighted16s(...) 0
#define arithm_ipp_addWeighted32s(...) 0
#define arithm_ipp_addWeighted32f(...) 0
#define arithm_ipp_addWeighted64f(...) 0
//=======================================
// Reciprocial
//=======================================
#define arithm_ipp_recip8u(...) 0
#define arithm_ipp_recip8s(...) 0
#define arithm_ipp_recip16u(...) 0
#define arithm_ipp_recip16s(...) 0
#define arithm_ipp_recip32s(...) 0
#define arithm_ipp_recip32f(...) 0
#define arithm_ipp_recip64f(...) 0
/** empty block in case if you have "fun"
#define arithm_ipp_8u(...) 0
#define arithm_ipp_8s(...) 0
#define arithm_ipp_16u(...) 0
#define arithm_ipp_16s(...) 0
#define arithm_ipp_32s(...) 0
#define arithm_ipp_32f(...) 0
#define arithm_ipp_64f(...) 0
**/
}} // cv::hal::
#define ARITHM_CALL_IPP(fun, ...) \
{ \
if (__CV_EXPAND(fun(__VA_ARGS__))) \
return; \
}
#endif // ARITHM_USE_IPP
#if !ARITHM_USE_IPP
#define ARITHM_CALL_IPP(...)
#endif

File diff suppressed because it is too large Load Diff

@ -86,7 +86,6 @@
#include "opencv2/core/sse_utils.hpp"
#include "opencv2/core/neon_utils.hpp"
#include "opencv2/core/vsx_utils.hpp"
#include "arithm_core.hpp"
#include "hal_replacement.hpp"
#ifdef HAVE_TEGRA_OPTIMIZATION
@ -110,6 +109,102 @@ extern const uchar g_Saturate8u[];
#define CV_MIN_8U(a,b) ((a) - CV_FAST_CAST_8U((a) - (b)))
#define CV_MAX_8U(a,b) ((a) + CV_FAST_CAST_8U((b) - (a)))
template<typename T1, typename T2=T1, typename T3=T1> struct OpAdd
{
typedef T1 type1;
typedef T2 type2;
typedef T3 rtype;
T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a + b); }
};
template<typename T1, typename T2=T1, typename T3=T1> struct OpSub
{
typedef T1 type1;
typedef T2 type2;
typedef T3 rtype;
T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a - b); }
};
template<typename T1, typename T2=T1, typename T3=T1> struct OpRSub
{
typedef T1 type1;
typedef T2 type2;
typedef T3 rtype;
T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(b - a); }
};
template<typename T> struct OpMin
{
typedef T type1;
typedef T type2;
typedef T rtype;
T operator ()(const T a, const T b) const { return std::min(a, b); }
};
template<typename T> struct OpMax
{
typedef T type1;
typedef T type2;
typedef T rtype;
T operator ()(const T a, const T b) const { return std::max(a, b); }
};
template<typename T> struct OpAbsDiff
{
typedef T type1;
typedef T type2;
typedef T rtype;
T operator()(T a, T b) const { return a > b ? a - b : b - a; }
};
// specializations to prevent "-0" results
template<> struct OpAbsDiff<float>
{
typedef float type1;
typedef float type2;
typedef float rtype;
float operator()(float a, float b) const { return std::abs(a - b); }
};
template<> struct OpAbsDiff<double>
{
typedef double type1;
typedef double type2;
typedef double rtype;
double operator()(double a, double b) const { return std::abs(a - b); }
};
template<typename T> struct OpAnd
{
typedef T type1;
typedef T type2;
typedef T rtype;
T operator()( T a, T b ) const { return a & b; }
};
template<typename T> struct OpOr
{
typedef T type1;
typedef T type2;
typedef T rtype;
T operator()( T a, T b ) const { return a | b; }
};
template<typename T> struct OpXor
{
typedef T type1;
typedef T type2;
typedef T rtype;
T operator()( T a, T b ) const { return a ^ b; }
};
template<typename T> struct OpNot
{
typedef T type1;
typedef T type2;
typedef T rtype;
T operator()( T a, T ) const { return ~a; }
};
template<> inline uchar OpAdd<uchar>::operator ()(uchar a, uchar b) const
{ return CV_FAST_CAST_8U(a + b); }

@ -119,11 +119,15 @@ template <typename R> struct Data
d[i] += (LaneType)m;
return *this;
}
void fill(LaneType val)
void fill(LaneType val, int s, int c = R::nlanes)
{
for (int i = 0; i < R::nlanes; ++i)
for (int i = s; i < c; ++i)
d[i] = val;
}
void fill(LaneType val)
{
fill(val, 0);
}
void reverse()
{
for (int i = 0; i < R::nlanes / 2; ++i)
@ -739,6 +743,23 @@ template<typename R> struct TheTest
return *this;
}
TheTest & test_absdiffs()
{
Data<R> dataA(std::numeric_limits<LaneType>::max()),
dataB(std::numeric_limits<LaneType>::min());
dataA[0] = (LaneType)-1;
dataB[0] = 1;
dataA[1] = 2;
dataB[1] = (LaneType)-2;
R a = dataA, b = dataB;
Data<R> resC = v_absdiffs(a, b);
for (int i = 0; i < R::nlanes; ++i)
{
EXPECT_EQ(saturate_cast<LaneType>(std::abs(dataA[i] - dataB[i])), resC[i]);
}
return *this;
}
TheTest & test_reduce()
{
Data<R> dataA;
@ -874,6 +895,81 @@ template<typename R> struct TheTest
return *this;
}
// v_uint8 only
TheTest & test_pack_b()
{
// 16-bit
Data<R> dataA, dataB;
dataB.fill(0, R::nlanes / 2);
R a = dataA, b = dataB;
Data<R> maskA = a == b, maskB = a != b;
a = maskA; b = maskB;
Data<R> res = v_pack_b(v_reinterpret_as_u16(a), v_reinterpret_as_u16(b));
for (int i = 0; i < v_uint16::nlanes; ++i)
{
SCOPED_TRACE(cv::format("i=%d", i));
EXPECT_EQ(maskA[i * 2], res[i]);
EXPECT_EQ(maskB[i * 2], res[i + v_uint16::nlanes]);
}
// 32-bit
Data<R> dataC, dataD;
dataD.fill(0, R::nlanes / 2);
R c = dataC, d = dataD;
Data<R> maskC = c == d, maskD = c != d;
c = maskC; d = maskD;
res = v_pack_b
(
v_reinterpret_as_u32(a), v_reinterpret_as_u32(b),
v_reinterpret_as_u32(c), v_reinterpret_as_u32(d)
);
for (int i = 0; i < v_uint32::nlanes; ++i)
{
SCOPED_TRACE(cv::format("i=%d", i));
EXPECT_EQ(maskA[i * 4], res[i]);
EXPECT_EQ(maskB[i * 4], res[i + v_uint32::nlanes]);
EXPECT_EQ(maskC[i * 4], res[i + v_uint32::nlanes * 2]);
EXPECT_EQ(maskD[i * 4], res[i + v_uint32::nlanes * 3]);
}
// 64-bit
Data<R> dataE, dataF, dataG(0), dataH(0xFF);
dataF.fill(0, R::nlanes / 2);
R e = dataE, f = dataF, g = dataG, h = dataH;
Data<R> maskE = e == f, maskF = e != f;
e = maskE; f = maskF;
res = v_pack_b
(
v_reinterpret_as_u64(a), v_reinterpret_as_u64(b),
v_reinterpret_as_u64(c), v_reinterpret_as_u64(d),
v_reinterpret_as_u64(e), v_reinterpret_as_u64(f),
v_reinterpret_as_u64(g), v_reinterpret_as_u64(h)
);
for (int i = 0; i < v_uint64::nlanes; ++i)
{
SCOPED_TRACE(cv::format("i=%d", i));
EXPECT_EQ(maskA[i * 8], res[i]);
EXPECT_EQ(maskB[i * 8], res[i + v_uint64::nlanes]);
EXPECT_EQ(maskC[i * 8], res[i + v_uint64::nlanes * 2]);
EXPECT_EQ(maskD[i * 8], res[i + v_uint64::nlanes * 3]);
EXPECT_EQ(maskE[i * 8], res[i + v_uint64::nlanes * 4]);
EXPECT_EQ(maskF[i * 8], res[i + v_uint64::nlanes * 5]);
EXPECT_EQ(dataG[i * 8], res[i + v_uint64::nlanes * 6]);
EXPECT_EQ(dataH[i * 8], res[i + v_uint64::nlanes * 7]);
}
return *this;
}
TheTest & test_unpack()
{
Data<R> dataA, dataB;
@ -1228,6 +1324,7 @@ void test_hal_intrin_uint8()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
.test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>()
.test_pack_b()
.test_unpack()
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
@ -1259,6 +1356,7 @@ void test_hal_intrin_int8()
.test_logic()
.test_min_max()
.test_absdiff()
.test_absdiffs()
.test_abs()
.test_mask()
.test_popcount()
@ -1317,6 +1415,7 @@ void test_hal_intrin_int16()
.test_logic()
.test_min_max()
.test_absdiff()
.test_absdiffs()
.test_abs()
.test_reduce()
.test_mask()

Loading…
Cancel
Save