Merge pull request #9024 from tomoaki0705:featureDispatchAccumulate

pull/9189/head
Alexander Alekhin 7 years ago
commit 10e6491c22
  1. 59
      modules/core/include/opencv2/core/hal/intrin_sse.hpp
  2. 1
      modules/imgproc/CMakeLists.txt
  3. 1713
      modules/imgproc/src/accum.cpp
  4. 20
      modules/imgproc/src/accum.dispatch.cpp
  5. 3187
      modules/imgproc/src/accum.simd.hpp

@ -899,6 +899,15 @@ inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float32x4, ps)
OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float64x2, pd)
#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec, cast) \
inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
{ return cast(v_reinterpret_as_f64(a) == v_reinterpret_as_f64(b)); } \
inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
{ return cast(v_reinterpret_as_f64(a) != v_reinterpret_as_f64(b)); }
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64);
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64);
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_add_wrap, _mm_add_epi8)
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_add_wrap, _mm_add_epi8)
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_add_wrap, _mm_add_epi16)
@ -1520,6 +1529,35 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4&
v_transpose4x4(u0, u1, u2, u3, a, b, c, d);
}
inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b, v_uint64x2& c)
{
__m128i t0 = _mm_loadu_si128((const __m128i*)ptr);
__m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2));
__m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4));
a = v_uint64x2(_mm_unpacklo_epi64(t0, _mm_unpackhi_epi64(t1, t1)));
b = v_uint64x2(_mm_unpacklo_epi64(_mm_unpackhi_epi64(t0, t0), t2));
c = v_uint64x2(_mm_unpacklo_epi64(t1, _mm_unpackhi_epi64(t2, t2)));
}
inline void v_load_deinterleave(const int64 *ptr, v_int64x2& a, v_int64x2& b, v_int64x2& c)
{
v_uint64x2 t0, t1, t2;
v_load_deinterleave((const uint64*)ptr, t0, t1, t2);
a = v_reinterpret_as_s64(t0);
b = v_reinterpret_as_s64(t1);
c = v_reinterpret_as_s64(t2);
}
inline void v_load_deinterleave(const double *ptr, v_float64x2& a, v_float64x2& b, v_float64x2& c)
{
v_uint64x2 t0, t1, t2;
v_load_deinterleave((const uint64*)ptr, t0, t1, t2);
a = v_reinterpret_as_f64(t0);
b = v_reinterpret_as_f64(t1);
c = v_reinterpret_as_f64(t2);
}
// 2-channel, float only
inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b)
{
@ -1717,6 +1755,27 @@ inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32
_mm_storeu_ps((ptr + 4), u1);
}
inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c)
{
__m128i t0 = _mm_unpacklo_epi64(a.val, b.val);
__m128i t1 = _mm_unpacklo_epi64(c.val, _mm_unpackhi_epi64(a.val, a.val));
__m128i t2 = _mm_unpackhi_epi64(b.val, c.val);
_mm_storeu_si128((__m128i*)ptr, t0);
_mm_storeu_si128((__m128i*)(ptr + 2), t1);
_mm_storeu_si128((__m128i*)(ptr + 4), t2);
}
inline void v_store_interleave(int64 *ptr, const v_int64x2& a, const v_int64x2& b, const v_int64x2& c)
{
v_store_interleave((uint64*)ptr, v_reinterpret_as_u64(a), v_reinterpret_as_u64(b), v_reinterpret_as_u64(c));
}
inline void v_store_interleave(double *ptr, const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
{
v_store_interleave((uint64*)ptr, v_reinterpret_as_u64(a), v_reinterpret_as_u64(b), v_reinterpret_as_u64(c));
}
#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec, _Tp, suffix, _Tpuvec, _Tpu, usuffix) \
inline void v_load_deinterleave( const _Tp* ptr, _Tpvec& a0, \
_Tpvec& b0, _Tpvec& c0 ) \

@ -1,2 +1,3 @@
set(the_description "Image Processing")
ocv_add_dispatched_file(accum SSE2 AVX NEON)
ocv_define_module(imgproc opencv_core WRAP java python)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,20 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "precomp.hpp"
#include "accum.simd.hpp"
#include "accum.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
namespace cv {
DEF_ACC_INT_FUNCS(8u32f, uchar, float)
DEF_ACC_INT_FUNCS(8u64f, uchar, double)
DEF_ACC_INT_FUNCS(16u32f, ushort, float)
DEF_ACC_INT_FUNCS(16u64f, ushort, double)
DEF_ACC_FLT_FUNCS(32f, float, float)
DEF_ACC_FLT_FUNCS(32f64f, float, double)
DEF_ACC_FLT_FUNCS(64f, double, double)
} //cv::hal

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save