From b4716b1d92d907707b7810f00d1abb38e4faf8df Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 17 Jul 2017 15:02:14 +0300 Subject: [PATCH 1/2] core: fix convertTo() AVX2 optimization --- modules/core/src/convert.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index 0955c78c5c..d63b53bcee 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -3678,7 +3678,7 @@ cvtScale_( const short* src, size_t sstep, if (CV_CPU_HAS_SUPPORT_AVX2) { opt_AVX2::cvtScale_s16s32f32Line_AVX2(src, dst, scale, shift, size.width); - return; + continue; } #endif #if CV_SSE2 From 4bb4a349c90646d0ffa74ec6c956475919d9aa7c Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 17 Jul 2017 15:12:41 +0300 Subject: [PATCH 2/2] imgproc: fix warp optimizations --- modules/imgproc/src/imgwarp.avx2.cpp | 2 ++ modules/imgproc/src/imgwarp.sse4_1.cpp | 41 +++++++++++++------------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/modules/imgproc/src/imgwarp.avx2.cpp b/modules/imgproc/src/imgwarp.avx2.cpp index d434807706..63ef2ae52d 100644 --- a/modules/imgproc/src/imgwarp.avx2.cpp +++ b/modules/imgproc/src/imgwarp.avx2.cpp @@ -125,6 +125,7 @@ public: } } } + _mm256_zeroupper(); } private: @@ -230,6 +231,7 @@ public: } } } + _mm256_zeroupper(); } private: diff --git a/modules/imgproc/src/imgwarp.sse4_1.cpp b/modules/imgproc/src/imgwarp.sse4_1.cpp index 79137d1230..49954b1dbc 100644 --- a/modules/imgproc/src/imgwarp.sse4_1.cpp +++ b/modules/imgproc/src/imgwarp.sse4_1.cpp @@ -387,19 +387,19 @@ class WarpPerspectiveLine_SSE4_Impl: public WarpPerspectiveLine_SSE4 public: WarpPerspectiveLine_SSE4_Impl(const double *M) { - v_M0 = _mm_set1_pd(M[0]); - v_M3 = _mm_set1_pd(M[3]); - v_M6 = _mm_set1_pd(M[6]); - v_intmax = _mm_set1_pd((double)INT_MAX); - v_intmin = _mm_set1_pd((double)INT_MIN); - v_2 = _mm_set1_pd(2); - v_zero = _mm_setzero_pd(); - v_1 = _mm_set1_pd(1); - v_its = _mm_set1_pd(INTER_TAB_SIZE); - v_itsi1 = _mm_set1_epi32(INTER_TAB_SIZE - 1); + CV_UNUSED(M); } virtual void processNN(const double *M, short* xy, double X0, double Y0, double W0, int bw) { + const __m128d v_M0 = _mm_set1_pd(M[0]); + const __m128d v_M3 = _mm_set1_pd(M[3]); + const __m128d v_M6 = _mm_set1_pd(M[6]); + const __m128d v_intmax = _mm_set1_pd((double)INT_MAX); + const __m128d v_intmin = _mm_set1_pd((double)INT_MIN); + const __m128d v_2 = _mm_set1_pd(2); + const __m128d v_zero = _mm_setzero_pd(); + const __m128d v_1 = _mm_set1_pd(1); + int x1 = 0; __m128d v_X0d = _mm_set1_pd(X0); __m128d v_Y0d = _mm_set1_pd(Y0); @@ -521,6 +521,16 @@ public: } virtual void process(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw) { + const __m128d v_M0 = _mm_set1_pd(M[0]); + const __m128d v_M3 = _mm_set1_pd(M[3]); + const __m128d v_M6 = _mm_set1_pd(M[6]); + const __m128d v_intmax = _mm_set1_pd((double)INT_MAX); + const __m128d v_intmin = _mm_set1_pd((double)INT_MIN); + const __m128d v_2 = _mm_set1_pd(2); + const __m128d v_zero = _mm_setzero_pd(); + const __m128d v_its = _mm_set1_pd(INTER_TAB_SIZE); + const __m128i v_itsi1 = _mm_set1_epi32(INTER_TAB_SIZE - 1); + int x1 = 0; __m128d v_X0d = _mm_set1_pd(X0); @@ -656,17 +666,6 @@ public: } } virtual ~WarpPerspectiveLine_SSE4_Impl() {}; -private: - __m128d v_M0; - __m128d v_M3; - __m128d v_M6; - __m128d v_intmax; - __m128d v_intmin; - __m128d v_2, - v_zero, - v_1, - v_its; - __m128i v_itsi1; }; Ptr WarpPerspectiveLine_SSE4::getImpl(const double *M)