From 44e9fb306d7972e6f222baee173a2937ec1e02d1 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 21 Apr 2020 04:14:18 +0000 Subject: [PATCH 01/20] features2d(sift): code from nonfree module --- modules/{nonfree => features2d}/src/sift.cpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename modules/{nonfree => features2d}/src/sift.cpp (100%) diff --git a/modules/nonfree/src/sift.cpp b/modules/features2d/src/sift.cpp similarity index 100% rename from modules/nonfree/src/sift.cpp rename to modules/features2d/src/sift.cpp From df10411e05a9f4e6fe3085cd9cb84b4bce32ba24 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 21 Apr 2020 06:19:16 +0000 Subject: [PATCH 02/20] features2d(sift): patent expiration note --- modules/features2d/src/sift.cpp | 45 +++++---------------------------- 1 file changed, 6 insertions(+), 39 deletions(-) diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index 2112971e94..a627502404 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -1,48 +1,15 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. // -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (c) 2006-2010, Rob Hess // Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ +// Copyright (C) 2020, Intel Corporation, all rights reserved. /**********************************************************************************************\ Implementation of SIFT is based on the code from http://blogs.oregonstate.edu/hess/code/sift/ Below is the original copyright. + Patent US6711293 expired in March 2020. // Copyright (c) 2006-2010, Rob Hess // All rights reserved. From e3654d54162419b1d785ac950430df6a4b4cc509 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Thu, 16 Oct 2014 16:33:21 +0400 Subject: [PATCH 03/20] [move sift.cpp] refactored xfeatures2d in the same style as features2d original commit: https://github.com/opencv/opencv_contrib/commit/0cfd795303c414aada6d10701e0de4995841210c --- modules/features2d/src/sift.cpp | 82 ++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 26 deletions(-) diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index a627502404..850bd4f3f5 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -76,6 +76,53 @@ namespace cv { +/*! + SIFT implementation. + + The class implements SIFT algorithm by D. Lowe. + */ +class SIFT_Impl : public SIFT +{ +public: + explicit SIFT_Impl( int nfeatures = 0, int nOctaveLayers = 3, + double contrastThreshold = 0.04, double edgeThreshold = 10, + double sigma = 1.6); + + //! returns the descriptor size in floats (128) + int descriptorSize() const; + + //! returns the descriptor type + int descriptorType() const; + + //! returns the default norm type + int defaultNorm() const; + + //! finds the keypoints and computes descriptors for them using SIFT algorithm. + //! Optionally it can compute descriptors for the user-provided keypoints + void detectAndCompute(InputArray img, InputArray mask, + std::vector& keypoints, + OutputArray descriptors, + bool useProvidedKeypoints = false); + + void buildGaussianPyramid( const Mat& base, std::vector& pyr, int nOctaves ) const; + void buildDoGPyramid( const std::vector& pyr, std::vector& dogpyr ) const; + void findScaleSpaceExtrema( const std::vector& gauss_pyr, const std::vector& dog_pyr, + std::vector& keypoints ) const; + +protected: + CV_PROP_RW int nfeatures; + CV_PROP_RW int nOctaveLayers; + CV_PROP_RW double contrastThreshold; + CV_PROP_RW double edgeThreshold; + CV_PROP_RW double sigma; +}; + +Ptr SIFT::create( int _nfeatures, int _nOctaveLayers, + double _contrastThreshold, double _edgeThreshold, double _sigma ) +{ + return makePtr(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma); +} + /******************************* Defs and macros *****************************/ // default width of descriptor histogram array @@ -161,7 +208,7 @@ static Mat createInitialImage( const Mat& img, bool doubleImageSize, float sigma } -void SIFT::buildGaussianPyramid( const Mat& base, std::vector& pyr, int nOctaves ) const +void SIFT_Impl::buildGaussianPyramid( const Mat& base, std::vector& pyr, int nOctaves ) const { std::vector sig(nOctaveLayers + 3); pyr.resize(nOctaves*(nOctaveLayers + 3)); @@ -201,7 +248,7 @@ void SIFT::buildGaussianPyramid( const Mat& base, std::vector& pyr, int nOc } -void SIFT::buildDoGPyramid( const std::vector& gpyr, std::vector& dogpyr ) const +void SIFT_Impl::buildDoGPyramid( const std::vector& gpyr, std::vector& dogpyr ) const { int nOctaves = (int)gpyr.size()/(nOctaveLayers + 3); dogpyr.resize( nOctaves*(nOctaveLayers + 2) ); @@ -399,7 +446,7 @@ static bool adjustLocalExtrema( const std::vector& dog_pyr, KeyPoint& kpt, // // Detects features at extrema in DoG scale space. Bad features are discarded // based on contrast and ratio of principal curvatures. -void SIFT::findScaleSpaceExtrema( const std::vector& gauss_pyr, const std::vector& dog_pyr, +void SIFT_Impl::findScaleSpaceExtrema( const std::vector& gauss_pyr, const std::vector& dog_pyr, std::vector& keypoints ) const { int nOctaves = (int)gauss_pyr.size()/(nOctaveLayers + 3); @@ -652,40 +699,33 @@ static void calcDescriptors(const std::vector& gpyr, const std::vector& keypoints) const -{ - (*this)(_image, _mask, keypoints, noArray()); -} - - -void SIFT::operator()(InputArray _image, InputArray _mask, +void SIFT_Impl::detectAndCompute(InputArray _image, InputArray _mask, std::vector& keypoints, OutputArray _descriptors, - bool useProvidedKeypoints) const + bool useProvidedKeypoints) { int firstOctave = -1, actualNOctaves = 0, actualNLayers = 0; Mat image = _image.getMat(), mask = _mask.getMat(); @@ -770,14 +810,4 @@ void SIFT::operator()(InputArray _image, InputArray _mask, } } -void SIFT::detectImpl( InputArray image, std::vector& keypoints, InputArray mask) const -{ - (*this)(image.getMat(), mask.getMat(), keypoints, noArray()); -} - -void SIFT::computeImpl( InputArray image, std::vector& keypoints, OutputArray descriptors) const -{ - (*this)(image, Mat(), keypoints, descriptors, true); -} - } From 9fc872b70bf326335a2903dbf96ad85e800fea95 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Thu, 16 Apr 2015 22:52:05 +0300 Subject: [PATCH 04/20] [move sift.cpp] fixed contrib code to match the HAL original commit: https://github.com/opencv/opencv_contrib/commit/cdddcc8237627f667d66daffb3fcb3af39a3e673 --- modules/features2d/src/sift.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index 850bd4f3f5..53f9ccba16 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -302,9 +302,9 @@ static float calcOrientationHist( const Mat& img, Point pt, int radius, len = k; // compute gradient values, orientations and the weights over the pixel neighborhood - exp(W, W, len); - fastAtan2(Y, X, Ori, len, true); - magnitude(X, Y, Mag, len); + hal::exp(W, W, len); + hal::fastAtan2(Y, X, Ori, len, true); + hal::magnitude(X, Y, Mag, len); for( k = 0; k < len; k++ ) { @@ -585,9 +585,9 @@ static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float sc } len = k; - fastAtan2(Y, X, Ori, len, true); - magnitude(X, Y, Mag, len); - exp(W, W, len); + hal::fastAtan2(Y, X, Ori, len, true); + hal::magnitude(X, Y, Mag, len); + hal::exp(W, W, len); for( k = 0; k < len; k++ ) { From 24284d3d17ed223a757323e4be3049e899d5e65c Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Tue, 15 Dec 2015 17:51:08 +0300 Subject: [PATCH 05/20] [move sift.cpp] Fixed HAL headers location original commit: https://github.com/opencv/opencv_contrib/commit/f529a1df2b17bcada1179ddcaf1352e3a5ba97b8 --- modules/features2d/src/sift.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index 53f9ccba16..9c3d2117b7 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -72,6 +72,7 @@ #include "precomp.hpp" #include #include +#include namespace cv { @@ -302,9 +303,9 @@ static float calcOrientationHist( const Mat& img, Point pt, int radius, len = k; // compute gradient values, orientations and the weights over the pixel neighborhood - hal::exp(W, W, len); - hal::fastAtan2(Y, X, Ori, len, true); - hal::magnitude(X, Y, Mag, len); + cv::hal::exp32f(W, W, len); + cv::hal::fastAtan2(Y, X, Ori, len, true); + cv::hal::magnitude32f(X, Y, Mag, len); for( k = 0; k < len; k++ ) { @@ -585,9 +586,9 @@ static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float sc } len = k; - hal::fastAtan2(Y, X, Ori, len, true); - hal::magnitude(X, Y, Mag, len); - hal::exp(W, W, len); + cv::hal::fastAtan2(Y, X, Ori, len, true); + cv::hal::magnitude32f(X, Y, Mag, len); + cv::hal::exp32f(W, W, len); for( k = 0; k < len; k++ ) { From 99d914ea3b16475a55529210da3b36120aefec8d Mon Sep 17 00:00:00 2001 From: Martin Nowak Date: Sat, 11 Jun 2016 17:51:46 +0200 Subject: [PATCH 06/20] [move sift.cpp] fix overflow issue when computing diagonal - with big images the int multiplication can overflow original commit: https://github.com/opencv/opencv_contrib/commit/d4df727d380887fdd880fdb5430cf4680a4ad19b --- modules/features2d/src/sift.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index 9c3d2117b7..f14577bc35 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -544,7 +544,7 @@ static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float sc float hist_width = SIFT_DESCR_SCL_FCTR * scl; int radius = cvRound(hist_width * 1.4142135623730951f * (d + 1) * 0.5f); // Clip the radius to the diagonal of the image to avoid autobuffer too large exception - radius = std::min(radius, (int) sqrt((double) img.cols*img.cols + img.rows*img.rows)); + radius = std::min(radius, (int) sqrt(((double) img.cols)*img.cols + ((double) img.rows)*img.rows)); cos_t /= hist_width; sin_t /= hist_width; From 8be0a3452ded4c8456a1c112ee0def2c37bcad75 Mon Sep 17 00:00:00 2001 From: Suleyman TURKMEN Date: Thu, 22 Dec 2016 23:29:12 +0300 Subject: [PATCH 07/20] [move sift.cpp] Update sift.cpp original commit: https://github.com/opencv/opencv_contrib/commit/cb7b59f203bf06586d6176ac812e0ee382cedcf1 --- modules/features2d/src/sift.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index f14577bc35..380aa6f138 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -185,10 +185,12 @@ static Mat createInitialImage( const Mat& img, bool doubleImageSize, float sigma { Mat gray, gray_fpt; if( img.channels() == 3 || img.channels() == 4 ) + { cvtColor(img, gray, COLOR_BGR2GRAY); + gray.convertTo(gray_fpt, DataType::type, SIFT_FIXPT_SCALE, 0); + } else - img.copyTo(gray); - gray.convertTo(gray_fpt, DataType::type, SIFT_FIXPT_SCALE, 0); + img.convertTo(gray_fpt, DataType::type, SIFT_FIXPT_SCALE, 0); float sig_diff; @@ -196,7 +198,7 @@ static Mat createInitialImage( const Mat& img, bool doubleImageSize, float sigma { sig_diff = sqrtf( std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA * 4, 0.01f) ); Mat dbl; - resize(gray_fpt, dbl, Size(gray.cols*2, gray.rows*2), 0, 0, INTER_LINEAR); + resize(gray_fpt, dbl, Size(gray_fpt.cols*2, gray_fpt.rows*2), 0, 0, INTER_LINEAR); GaussianBlur(dbl, dbl, Size(), sig_diff, sig_diff); return dbl; } From 546239a3a86d76db0aad6b5c72a200d9ea2ff8c4 Mon Sep 17 00:00:00 2001 From: Woody Chow Date: Wed, 8 Mar 2017 10:08:50 +0900 Subject: [PATCH 08/20] [move sift.cpp] Optimize SIFT with AVX2 original commit: https://github.com/opencv/opencv_contrib/commit/c5e55dfde96307fef12fc55f63d6a600fd784582 --- modules/features2d/src/sift.cpp | 235 +++++++++++++++++++++++++++++++- 1 file changed, 229 insertions(+), 6 deletions(-) diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index 380aa6f138..49b9c495c7 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -309,7 +309,40 @@ static float calcOrientationHist( const Mat& img, Point pt, int radius, cv::hal::fastAtan2(Y, X, Ori, len, true); cv::hal::magnitude32f(X, Y, Mag, len); - for( k = 0; k < len; k++ ) + k = 0; +#if CV_AVX2 + if( USE_AVX2 ) + { + __m256 __nd360 = _mm256_set1_ps(n/360.f); + __m256i __n = _mm256_set1_epi32(n); + int CV_DECL_ALIGNED(32) bin_buf[8]; + float CV_DECL_ALIGNED(32) w_mul_mag_buf[8]; + for ( ; k <= len - 8; k+=8 ) + { + __m256i __bin = _mm256_cvtps_epi32(_mm256_round_ps(_mm256_mul_ps(__nd360, _mm256_loadu_ps(&Ori[k])), _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)); + + __bin = _mm256_sub_epi32(__bin, + _mm256_and_si256(__n, _mm256_or_si256(_mm256_cmpeq_epi32(__bin, __n), _mm256_cmpgt_epi32(__bin, __n)))); + __bin = _mm256_add_epi32(__bin, + _mm256_and_si256(__n, _mm256_cmpgt_epi32(_mm256_setzero_si256(), __bin))); + + __m256 __w_mul_mag = _mm256_mul_ps(_mm256_loadu_ps(&W[k]), _mm256_loadu_ps(&Mag[k])); + + _mm256_store_si256((__m256i *) bin_buf, __bin); + _mm256_store_ps(w_mul_mag_buf, __w_mul_mag); + + temphist[bin_buf[0]] += w_mul_mag_buf[0]; + temphist[bin_buf[1]] += w_mul_mag_buf[1]; + temphist[bin_buf[2]] += w_mul_mag_buf[2]; + temphist[bin_buf[3]] += w_mul_mag_buf[3]; + temphist[bin_buf[4]] += w_mul_mag_buf[4]; + temphist[bin_buf[5]] += w_mul_mag_buf[5]; + temphist[bin_buf[6]] += w_mul_mag_buf[6]; + temphist[bin_buf[7]] += w_mul_mag_buf[7]; + } + } +#endif + for( ; k < len; k++ ) { int bin = cvRound((n/360.f)*Ori[k]); if( bin >= n ) @@ -324,7 +357,40 @@ static float calcOrientationHist( const Mat& img, Point pt, int radius, temphist[-2] = temphist[n-2]; temphist[n] = temphist[0]; temphist[n+1] = temphist[1]; - for( i = 0; i < n; i++ ) + + i = 0; +#if CV_AVX2 + if( USE_AVX2 ) + { + __m256 __d_1_16 = _mm256_set1_ps(1.f/16.f); + __m256 __d_4_16 = _mm256_set1_ps(4.f/16.f); + __m256 __d_6_16 = _mm256_set1_ps(6.f/16.f); + for( ; i <= n - 8; i+=8 ) + { +#if CV_FMA3 + __m256 __hist = _mm256_fmadd_ps( + _mm256_add_ps(_mm256_loadu_ps(&temphist[i-2]), _mm256_loadu_ps(&temphist[i+2])), + __d_1_16, + _mm256_fmadd_ps( + _mm256_add_ps(_mm256_loadu_ps(&temphist[i-1]), _mm256_loadu_ps(&temphist[i+1])), + __d_4_16, + _mm256_mul_ps(_mm256_loadu_ps(&temphist[i]), __d_6_16))); +#else + __m256 __hist = _mm256_add_ps( + _mm256_mul_ps( + _mm256_add_ps(_mm256_loadu_ps(&temphist[i-2]), _mm256_loadu_ps(&temphist[i+2])), + __d_1_16), + _mm256_add_ps( + _mm256_mul_ps( + _mm256_add_ps(_mm256_loadu_ps(&temphist[i-1]), _mm256_loadu_ps(&temphist[i+1])), + __d_4_16), + _mm256_mul_ps(_mm256_loadu_ps(&temphist[i]), __d_6_16))); +#endif + _mm256_storeu_ps(&hist[i], __hist); + } + } +#endif + for( ; i < n; i++ ) { hist[i] = (temphist[i-2] + temphist[i+2])*(1.f/16.f) + (temphist[i-1] + temphist[i+1])*(4.f/16.f) + @@ -592,7 +658,99 @@ static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float sc cv::hal::magnitude32f(X, Y, Mag, len); cv::hal::exp32f(W, W, len); - for( k = 0; k < len; k++ ) + k = 0; +#if CV_AVX2 + if( USE_AVX2 ) + { + int CV_DECL_ALIGNED(32) idx_buf[8]; + float CV_DECL_ALIGNED(32) rco_buf[64]; + __m256 __ori = _mm256_set1_ps(ori); + __m256 __bins_per_rad = _mm256_set1_ps(bins_per_rad); + __m256i __n = _mm256_set1_epi32(n); + for( ; k <= len - 8; k+=8 ) + { + __m256 __rbin = _mm256_loadu_ps(&RBin[k]); + __m256 __cbin = _mm256_loadu_ps(&CBin[k]); + __m256 __obin = _mm256_mul_ps(_mm256_sub_ps(_mm256_loadu_ps(&Ori[k]), __ori), __bins_per_rad); + __m256 __mag = _mm256_mul_ps(_mm256_loadu_ps(&Mag[k]), _mm256_loadu_ps(&W[k])); + + __m256 __r0 = _mm256_floor_ps(__rbin); + __rbin = _mm256_sub_ps(__rbin, __r0); + __m256 __c0 = _mm256_floor_ps(__cbin); + __cbin = _mm256_sub_ps(__cbin, __c0); + __m256 __o0 = _mm256_floor_ps(__obin); + __obin = _mm256_sub_ps(__obin, __o0); + + __m256i __o0i = _mm256_cvtps_epi32(__o0); + // _o0 += (o0 < 0) * n + __o0i = _mm256_add_epi32(__o0i, _mm256_and_si256(__n, _mm256_cmpgt_epi32(_mm256_setzero_si256(), __o0i))); + __o0i = _mm256_sub_epi32(__o0i, + _mm256_and_si256(__n, _mm256_or_si256(_mm256_cmpeq_epi32(__o0i, __n), _mm256_cmpgt_epi32(__o0i, __n)))); + + __m256 __v_r1 = _mm256_mul_ps(__mag, __rbin); + __m256 __v_r0 = _mm256_sub_ps(__mag, __v_r1); + + __m256 __v_rc11 = _mm256_mul_ps(__v_r1, __cbin); + __m256 __v_rc10 = _mm256_sub_ps(__v_r1, __v_rc11); + + __m256 __v_rc01 = _mm256_mul_ps(__v_r0, __cbin); + __m256 __v_rc00 = _mm256_sub_ps(__v_r0, __v_rc01); + + __m256 __v_rco111 = _mm256_mul_ps(__v_rc11, __obin); + __m256 __v_rco110 = _mm256_sub_ps(__v_rc11, __v_rco111); + + __m256 __v_rco101 = _mm256_mul_ps(__v_rc10, __obin); + __m256 __v_rco100 = _mm256_sub_ps(__v_rc10, __v_rco101); + + __m256 __v_rco011 = _mm256_mul_ps(__v_rc01, __obin); + __m256 __v_rco010 = _mm256_sub_ps(__v_rc01, __v_rco011); + + __m256 __v_rco001 = _mm256_mul_ps(__v_rc00, __obin); + __m256 __v_rco000 = _mm256_sub_ps(__v_rc00, __v_rco001); + + __m256i __one = _mm256_set1_epi32(1); + __m256i __idx = _mm256_add_epi32( + _mm256_mullo_epi32( + _mm256_add_epi32( + _mm256_mullo_epi32(_mm256_add_epi32(_mm256_cvtps_epi32(__r0), __one), _mm256_set1_epi32(d + 2)), + _mm256_add_epi32(_mm256_cvtps_epi32(__c0), __one)), + _mm256_set1_epi32(n + 2)), + __o0i); + + _mm256_store_si256((__m256i *)idx_buf, __idx); + + _mm256_store_ps(&(rco_buf[0]), __v_rco000); + _mm256_store_ps(&(rco_buf[8]), __v_rco001); + _mm256_store_ps(&(rco_buf[16]), __v_rco010); + _mm256_store_ps(&(rco_buf[24]), __v_rco011); + _mm256_store_ps(&(rco_buf[32]), __v_rco100); + _mm256_store_ps(&(rco_buf[40]), __v_rco101); + _mm256_store_ps(&(rco_buf[48]), __v_rco110); + _mm256_store_ps(&(rco_buf[56]), __v_rco111); + #define HIST_SUM_HELPER(id) \ + hist[idx_buf[(id)]] += rco_buf[(id)]; \ + hist[idx_buf[(id)]+1] += rco_buf[8 + (id)]; \ + hist[idx_buf[(id)]+(n+2)] += rco_buf[16 + (id)]; \ + hist[idx_buf[(id)]+(n+3)] += rco_buf[24 + (id)]; \ + hist[idx_buf[(id)]+(d+2)*(n+2)] += rco_buf[32 + (id)]; \ + hist[idx_buf[(id)]+(d+2)*(n+2)+1] += rco_buf[40 + (id)]; \ + hist[idx_buf[(id)]+(d+3)*(n+2)] += rco_buf[48 + (id)]; \ + hist[idx_buf[(id)]+(d+3)*(n+2)+1] += rco_buf[56 + (id)]; + + HIST_SUM_HELPER(0); + HIST_SUM_HELPER(1); + HIST_SUM_HELPER(2); + HIST_SUM_HELPER(3); + HIST_SUM_HELPER(4); + HIST_SUM_HELPER(5); + HIST_SUM_HELPER(6); + HIST_SUM_HELPER(7); + + #undef HIST_SUM_HELPER + } + } +#endif + for( ; k < len; k++ ) { float rbin = RBin[k], cbin = CBin[k]; float obin = (Ori[k] - ori)*bins_per_rad; @@ -646,10 +804,59 @@ static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float sc // to byte array float nrm2 = 0; len = d*d*n; - for( k = 0; k < len; k++ ) + k = 0; +#if CV_AVX2 + if( USE_AVX2 ) + { + float CV_DECL_ALIGNED(32) nrm2_buf[8]; + __m256 __nrm2 = _mm256_setzero_ps(); + __m256 __dst; + for( ; k <= len - 8; k += 8 ) + { + __dst = _mm256_loadu_ps(&dst[k]); +#if CV_FMA3 + __nrm2 = _mm256_fmadd_ps(__dst, __dst, __nrm2); +#else + __nrm2 = _mm256_add_ps(__nrm2, _mm256_mul_ps(__dst, __dst)); +#endif + } + _mm256_store_ps(nrm2_buf, __nrm2); + nrm2 = nrm2_buf[0] + nrm2_buf[1] + nrm2_buf[2] + nrm2_buf[3] + + nrm2_buf[4] + nrm2_buf[5] + nrm2_buf[6] + nrm2_buf[7]; + } +#endif + for( ; k < len; k++ ) nrm2 += dst[k]*dst[k]; + float thr = std::sqrt(nrm2)*SIFT_DESCR_MAG_THR; - for( i = 0, nrm2 = 0; i < k; i++ ) + + i = 0, nrm2 = 0; +#if 0 //CV_AVX2 + // This code cannot be enabled because it sums nrm2 in a different order, + // thus producing slightly different results + if( USE_AVX2 ) + { + float CV_DECL_ALIGNED(32) nrm2_buf[8]; + __m256 __dst; + __m256 __nrm2 = _mm256_setzero_ps(); + __m256 __thr = _mm256_set1_ps(thr); + for( ; i <= len - 8; i += 8 ) + { + __dst = _mm256_loadu_ps(&dst[i]); + __dst = _mm256_min_ps(__dst, __thr); + _mm256_storeu_ps(&dst[i], __dst); +#if CV_FMA3 + __nrm2 = _mm256_fmadd_ps(__dst, __dst, __nrm2); +#else + __nrm2 = _mm256_add_ps(__nrm2, _mm256_mul_ps(__dst, __dst)); +#endif + } + _mm256_store_ps(nrm2_buf, __nrm2); + nrm2 = nrm2_buf[0] + nrm2_buf[1] + nrm2_buf[2] + nrm2_buf[3] + + nrm2_buf[4] + nrm2_buf[5] + nrm2_buf[6] + nrm2_buf[7]; + } +#endif + for( ; i < len; i++ ) { float val = std::min(dst[i], thr); dst[i] = val; @@ -658,7 +865,23 @@ static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float sc nrm2 = SIFT_INT_DESCR_FCTR/std::max(std::sqrt(nrm2), FLT_EPSILON); #if 1 - for( k = 0; k < len; k++ ) + k = 0; +#if CV_AVX2 + if( USE_AVX2 ) + { + __m256 __dst; + __m256 __min = _mm256_setzero_ps(); + __m256 __max = _mm256_set1_ps(255.0f); // max of uchar + __m256 __nrm2 = _mm256_set1_ps(nrm2); + for( k = 0; k <= len - 8; k+=8 ) + { + __dst = _mm256_loadu_ps(&dst[k]); + __dst = _mm256_min_ps(_mm256_max_ps(_mm256_round_ps(_mm256_mul_ps(__dst, __nrm2), _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC), __min), __max); + _mm256_storeu_ps(&dst[k], __dst); + } + } +#endif + for( ; k < len; k++ ) { dst[k] = saturate_cast(dst[k]*nrm2); } From 4b64955a12d39150a865ed5fef8453613eaf103d Mon Sep 17 00:00:00 2001 From: Woody Chow Date: Fri, 24 Mar 2017 16:31:18 +0900 Subject: [PATCH 09/20] [move sift.cpp] Parallelize calcDescriptors and buildDoGPyramid. Simplify 2 lines of AVX2 instructions original commit: https://github.com/opencv/opencv_contrib/commit/443f68cb71128d5ae27e8771a91f7641f9450685 --- modules/features2d/src/sift.cpp | 119 ++++++++++++++++++++++---------- 1 file changed, 84 insertions(+), 35 deletions(-) diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index 49b9c495c7..50ea4dcc66 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -251,23 +251,47 @@ void SIFT_Impl::buildGaussianPyramid( const Mat& base, std::vector& pyr, in } -void SIFT_Impl::buildDoGPyramid( const std::vector& gpyr, std::vector& dogpyr ) const +class buildDoGPyramidComputer : public ParallelLoopBody { - int nOctaves = (int)gpyr.size()/(nOctaveLayers + 3); - dogpyr.resize( nOctaves*(nOctaveLayers + 2) ); - - for( int o = 0; o < nOctaves; o++ ) +public: + buildDoGPyramidComputer( + int _nOctaveLayers, + const std::vector& _gpyr, + std::vector& _dogpyr) + : nOctaveLayers(_nOctaveLayers), + gpyr(_gpyr), + dogpyr(_dogpyr) { } + + void operator()( const cv::Range& range ) const { - for( int i = 0; i < nOctaveLayers + 2; i++ ) + const int begin = range.start; + const int end = range.end; + + for( int a = begin; a < end; a++ ) { + const int o = a / (nOctaveLayers + 2); + const int i = a % (nOctaveLayers + 2); + const Mat& src1 = gpyr[o*(nOctaveLayers + 3) + i]; const Mat& src2 = gpyr[o*(nOctaveLayers + 3) + i + 1]; Mat& dst = dogpyr[o*(nOctaveLayers + 2) + i]; subtract(src2, src1, dst, noArray(), DataType::type); } } -} +private: + int nOctaveLayers; + const std::vector& gpyr; + std::vector& dogpyr; +}; + +void SIFT_Impl::buildDoGPyramid( const std::vector& gpyr, std::vector& dogpyr ) const +{ + int nOctaves = (int)gpyr.size()/(nOctaveLayers + 3); + dogpyr.resize( nOctaves*(nOctaveLayers + 2) ); + + parallel_for_(Range(0, nOctaves * (nOctaveLayers + 2)), buildDoGPyramidComputer(nOctaveLayers, gpyr, dogpyr)); +} // Computes a gradient orientation histogram at a specified pixel static float calcOrientationHist( const Mat& img, Point pt, int radius, @@ -321,10 +345,8 @@ static float calcOrientationHist( const Mat& img, Point pt, int radius, { __m256i __bin = _mm256_cvtps_epi32(_mm256_round_ps(_mm256_mul_ps(__nd360, _mm256_loadu_ps(&Ori[k])), _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)); - __bin = _mm256_sub_epi32(__bin, - _mm256_and_si256(__n, _mm256_or_si256(_mm256_cmpeq_epi32(__bin, __n), _mm256_cmpgt_epi32(__bin, __n)))); - __bin = _mm256_add_epi32(__bin, - _mm256_and_si256(__n, _mm256_cmpgt_epi32(_mm256_setzero_si256(), __bin))); + __bin = _mm256_sub_epi32(__bin, _mm256_andnot_si256(_mm256_cmpgt_epi32(__n, __bin), __n)); + __bin = _mm256_add_epi32(__bin, _mm256_and_si256(__n, _mm256_cmpgt_epi32(_mm256_setzero_si256(), __bin))); __m256 __w_mul_mag = _mm256_mul_ps(_mm256_loadu_ps(&W[k]), _mm256_loadu_ps(&Mag[k])); @@ -664,9 +686,9 @@ static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float sc { int CV_DECL_ALIGNED(32) idx_buf[8]; float CV_DECL_ALIGNED(32) rco_buf[64]; - __m256 __ori = _mm256_set1_ps(ori); - __m256 __bins_per_rad = _mm256_set1_ps(bins_per_rad); - __m256i __n = _mm256_set1_epi32(n); + const __m256 __ori = _mm256_set1_ps(ori); + const __m256 __bins_per_rad = _mm256_set1_ps(bins_per_rad); + const __m256i __n = _mm256_set1_epi32(n); for( ; k <= len - 8; k+=8 ) { __m256 __rbin = _mm256_loadu_ps(&RBin[k]); @@ -682,10 +704,8 @@ static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float sc __obin = _mm256_sub_ps(__obin, __o0); __m256i __o0i = _mm256_cvtps_epi32(__o0); - // _o0 += (o0 < 0) * n __o0i = _mm256_add_epi32(__o0i, _mm256_and_si256(__n, _mm256_cmpgt_epi32(_mm256_setzero_si256(), __o0i))); - __o0i = _mm256_sub_epi32(__o0i, - _mm256_and_si256(__n, _mm256_or_si256(_mm256_cmpeq_epi32(__o0i, __n), _mm256_cmpgt_epi32(__o0i, __n)))); + __o0i = _mm256_sub_epi32(__o0i, _mm256_andnot_si256(_mm256_cmpgt_epi32(__n, __o0i), __n)); __m256 __v_r1 = _mm256_mul_ps(__mag, __rbin); __m256 __v_r0 = _mm256_sub_ps(__mag, __v_r1); @@ -900,27 +920,56 @@ static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float sc #endif } -static void calcDescriptors(const std::vector& gpyr, const std::vector& keypoints, - Mat& descriptors, int nOctaveLayers, int firstOctave ) +class calcDescriptorsComputer : public ParallelLoopBody { - int d = SIFT_DESCR_WIDTH, n = SIFT_DESCR_HIST_BINS; - - for( size_t i = 0; i < keypoints.size(); i++ ) +public: + calcDescriptorsComputer(const std::vector& _gpyr, + const std::vector& _keypoints, + Mat& _descriptors, + int _nOctaveLayers, + int _firstOctave) + : gpyr(_gpyr), + keypoints(_keypoints), + descriptors(_descriptors), + nOctaveLayers(_nOctaveLayers), + firstOctave(_firstOctave) { } + + void operator()( const cv::Range& range ) const { - KeyPoint kpt = keypoints[i]; - int octave, layer; - float scale; - unpackOctave(kpt, octave, layer, scale); - CV_Assert(octave >= firstOctave && layer <= nOctaveLayers+2); - float size=kpt.size*scale; - Point2f ptf(kpt.pt.x*scale, kpt.pt.y*scale); - const Mat& img = gpyr[(octave - firstOctave)*(nOctaveLayers + 3) + layer]; - - float angle = 360.f - kpt.angle; - if(std::abs(angle - 360.f) < FLT_EPSILON) - angle = 0.f; - calcSIFTDescriptor(img, ptf, angle, size*0.5f, d, n, descriptors.ptr((int)i)); + const int begin = range.start; + const int end = range.end; + + static const int d = SIFT_DESCR_WIDTH, n = SIFT_DESCR_HIST_BINS; + + for ( int i = begin; i= firstOctave && layer <= nOctaveLayers+2); + float size=kpt.size*scale; + Point2f ptf(kpt.pt.x*scale, kpt.pt.y*scale); + const Mat& img = gpyr[(octave - firstOctave)*(nOctaveLayers + 3) + layer]; + + float angle = 360.f - kpt.angle; + if(std::abs(angle - 360.f) < FLT_EPSILON) + angle = 0.f; + calcSIFTDescriptor(img, ptf, angle, size*0.5f, d, n, descriptors.ptr((int)i)); + } } +private: + const std::vector& gpyr; + const std::vector& keypoints; + Mat& descriptors; + int nOctaveLayers; + int firstOctave; +}; + +static void calcDescriptors(const std::vector& gpyr, const std::vector& keypoints, + Mat& descriptors, int nOctaveLayers, int firstOctave ) +{ + parallel_for_(Range(0, static_cast(keypoints.size())), calcDescriptorsComputer(gpyr, keypoints, descriptors, nOctaveLayers, firstOctave)); } ////////////////////////////////////////////////////////////////////////////////////////// From b6d636214e69d408c77f51f061638665e1ebacda Mon Sep 17 00:00:00 2001 From: Woody Chow Date: Wed, 31 May 2017 10:02:09 +0900 Subject: [PATCH 10/20] [move sift.cpp] Multithreading findScaleSpaceExtremaComputer. Sort the keypoints afterwards to make the output stable original commit: https://github.com/opencv/opencv_contrib/commit/6be2945abb0dcff2e038b5f23c7c7716da69ede9 --- modules/features2d/src/sift.cpp | 208 +++++++++++++++++++++----------- 1 file changed, 138 insertions(+), 70 deletions(-) diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index 50ea4dcc66..bbc8725e36 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -534,85 +534,109 @@ static bool adjustLocalExtrema( const std::vector& dog_pyr, KeyPoint& kpt, } -// -// Detects features at extrema in DoG scale space. Bad features are discarded -// based on contrast and ratio of principal curvatures. -void SIFT_Impl::findScaleSpaceExtrema( const std::vector& gauss_pyr, const std::vector& dog_pyr, - std::vector& keypoints ) const +class findScaleSpaceExtremaComputer : public ParallelLoopBody { - int nOctaves = (int)gauss_pyr.size()/(nOctaveLayers + 3); - int threshold = cvFloor(0.5 * contrastThreshold / nOctaveLayers * 255 * SIFT_FIXPT_SCALE); - const int n = SIFT_ORI_HIST_BINS; - float hist[n]; - KeyPoint kpt; +public: + findScaleSpaceExtremaComputer( + int _o, + int _i, + int _threshold, + int _idx, + int _step, + int _cols, + int _nOctaveLayers, + double _contrastThreshold, + double _edgeThreshold, + double _sigma, + const std::vector& _gauss_pyr, + const std::vector& _dog_pyr, + std::vector& _keypoints, + Mutex &_mutex) + + : o(_o), + i(_i), + threshold(_threshold), + idx(_idx), + step(_step), + cols(_cols), + nOctaveLayers(_nOctaveLayers), + contrastThreshold(_contrastThreshold), + edgeThreshold(_edgeThreshold), + sigma(_sigma), + gauss_pyr(_gauss_pyr), + dog_pyr(_dog_pyr), + keypoints(_keypoints), + mutex(_mutex) { } + void operator()( const cv::Range& range ) const + { + const int begin = range.start; + const int end = range.end; - keypoints.clear(); + static const int n = SIFT_ORI_HIST_BINS; + float hist[n]; - for( int o = 0; o < nOctaves; o++ ) - for( int i = 1; i <= nOctaveLayers; i++ ) + const Mat& img = dog_pyr[idx]; + const Mat& prev = dog_pyr[idx-1]; + const Mat& next = dog_pyr[idx+1]; + + KeyPoint kpt; + for( int r = begin; r < end; r++) { - int idx = o*(nOctaveLayers+2)+i; - const Mat& img = dog_pyr[idx]; - const Mat& prev = dog_pyr[idx-1]; - const Mat& next = dog_pyr[idx+1]; - int step = (int)img.step1(); - int rows = img.rows, cols = img.cols; + const sift_wt* currptr = img.ptr(r); + const sift_wt* prevptr = prev.ptr(r); + const sift_wt* nextptr = next.ptr(r); - for( int r = SIFT_IMG_BORDER; r < rows-SIFT_IMG_BORDER; r++) + for( int c = SIFT_IMG_BORDER; c < cols-SIFT_IMG_BORDER; c++) { - const sift_wt* currptr = img.ptr(r); - const sift_wt* prevptr = prev.ptr(r); - const sift_wt* nextptr = next.ptr(r); - - for( int c = SIFT_IMG_BORDER; c < cols-SIFT_IMG_BORDER; c++) + sift_wt val = currptr[c]; + + // find local extrema with pixel accuracy + if( std::abs(val) > threshold && + ((val > 0 && val >= currptr[c-1] && val >= currptr[c+1] && + val >= currptr[c-step-1] && val >= currptr[c-step] && val >= currptr[c-step+1] && + val >= currptr[c+step-1] && val >= currptr[c+step] && val >= currptr[c+step+1] && + val >= nextptr[c] && val >= nextptr[c-1] && val >= nextptr[c+1] && + val >= nextptr[c-step-1] && val >= nextptr[c-step] && val >= nextptr[c-step+1] && + val >= nextptr[c+step-1] && val >= nextptr[c+step] && val >= nextptr[c+step+1] && + val >= prevptr[c] && val >= prevptr[c-1] && val >= prevptr[c+1] && + val >= prevptr[c-step-1] && val >= prevptr[c-step] && val >= prevptr[c-step+1] && + val >= prevptr[c+step-1] && val >= prevptr[c+step] && val >= prevptr[c+step+1]) || + (val < 0 && val <= currptr[c-1] && val <= currptr[c+1] && + val <= currptr[c-step-1] && val <= currptr[c-step] && val <= currptr[c-step+1] && + val <= currptr[c+step-1] && val <= currptr[c+step] && val <= currptr[c+step+1] && + val <= nextptr[c] && val <= nextptr[c-1] && val <= nextptr[c+1] && + val <= nextptr[c-step-1] && val <= nextptr[c-step] && val <= nextptr[c-step+1] && + val <= nextptr[c+step-1] && val <= nextptr[c+step] && val <= nextptr[c+step+1] && + val <= prevptr[c] && val <= prevptr[c-1] && val <= prevptr[c+1] && + val <= prevptr[c-step-1] && val <= prevptr[c-step] && val <= prevptr[c-step+1] && + val <= prevptr[c+step-1] && val <= prevptr[c+step] && val <= prevptr[c+step+1]))) { - sift_wt val = currptr[c]; - - // find local extrema with pixel accuracy - if( std::abs(val) > threshold && - ((val > 0 && val >= currptr[c-1] && val >= currptr[c+1] && - val >= currptr[c-step-1] && val >= currptr[c-step] && val >= currptr[c-step+1] && - val >= currptr[c+step-1] && val >= currptr[c+step] && val >= currptr[c+step+1] && - val >= nextptr[c] && val >= nextptr[c-1] && val >= nextptr[c+1] && - val >= nextptr[c-step-1] && val >= nextptr[c-step] && val >= nextptr[c-step+1] && - val >= nextptr[c+step-1] && val >= nextptr[c+step] && val >= nextptr[c+step+1] && - val >= prevptr[c] && val >= prevptr[c-1] && val >= prevptr[c+1] && - val >= prevptr[c-step-1] && val >= prevptr[c-step] && val >= prevptr[c-step+1] && - val >= prevptr[c+step-1] && val >= prevptr[c+step] && val >= prevptr[c+step+1]) || - (val < 0 && val <= currptr[c-1] && val <= currptr[c+1] && - val <= currptr[c-step-1] && val <= currptr[c-step] && val <= currptr[c-step+1] && - val <= currptr[c+step-1] && val <= currptr[c+step] && val <= currptr[c+step+1] && - val <= nextptr[c] && val <= nextptr[c-1] && val <= nextptr[c+1] && - val <= nextptr[c-step-1] && val <= nextptr[c-step] && val <= nextptr[c-step+1] && - val <= nextptr[c+step-1] && val <= nextptr[c+step] && val <= nextptr[c+step+1] && - val <= prevptr[c] && val <= prevptr[c-1] && val <= prevptr[c+1] && - val <= prevptr[c-step-1] && val <= prevptr[c-step] && val <= prevptr[c-step+1] && - val <= prevptr[c+step-1] && val <= prevptr[c+step] && val <= prevptr[c+step+1]))) + int r1 = r, c1 = c, layer = i; + if( !adjustLocalExtrema(dog_pyr, kpt, o, layer, r1, c1, + nOctaveLayers, (float)contrastThreshold, + (float)edgeThreshold, (float)sigma) ) + continue; + float scl_octv = kpt.size*0.5f/(1 << o); + float omax = calcOrientationHist(gauss_pyr[o*(nOctaveLayers+3) + layer], + Point(c1, r1), + cvRound(SIFT_ORI_RADIUS * scl_octv), + SIFT_ORI_SIG_FCTR * scl_octv, + hist, n); + float mag_thr = (float)(omax * SIFT_ORI_PEAK_RATIO); + for( int j = 0; j < n; j++ ) { - int r1 = r, c1 = c, layer = i; - if( !adjustLocalExtrema(dog_pyr, kpt, o, layer, r1, c1, - nOctaveLayers, (float)contrastThreshold, - (float)edgeThreshold, (float)sigma) ) - continue; - float scl_octv = kpt.size*0.5f/(1 << o); - float omax = calcOrientationHist(gauss_pyr[o*(nOctaveLayers+3) + layer], - Point(c1, r1), - cvRound(SIFT_ORI_RADIUS * scl_octv), - SIFT_ORI_SIG_FCTR * scl_octv, - hist, n); - float mag_thr = (float)(omax * SIFT_ORI_PEAK_RATIO); - for( int j = 0; j < n; j++ ) - { - int l = j > 0 ? j - 1 : n - 1; - int r2 = j < n-1 ? j + 1 : 0; + int l = j > 0 ? j - 1 : n - 1; + int r2 = j < n-1 ? j + 1 : 0; - if( hist[j] > hist[l] && hist[j] > hist[r2] && hist[j] >= mag_thr ) + if( hist[j] > hist[l] && hist[j] > hist[r2] && hist[j] >= mag_thr ) + { + float bin = j + 0.5f * (hist[l]-hist[r2]) / (hist[l] - 2*hist[j] + hist[r2]); + bin = bin < 0 ? n + bin : bin >= n ? bin - n : bin; + kpt.angle = 360.f - (float)((360.f/n) * bin); + if(std::abs(kpt.angle - 360.f) < FLT_EPSILON) + kpt.angle = 0.f; { - float bin = j + 0.5f * (hist[l]-hist[r2]) / (hist[l] - 2*hist[j] + hist[r2]); - bin = bin < 0 ? n + bin : bin >= n ? bin - n : bin; - kpt.angle = 360.f - (float)((360.f/n) * bin); - if(std::abs(kpt.angle - 360.f) < FLT_EPSILON) - kpt.angle = 0.f; + AutoLock autoLock(mutex); keypoints.push_back(kpt); } } @@ -620,6 +644,50 @@ void SIFT_Impl::findScaleSpaceExtrema( const std::vector& gauss_pyr, const } } } + } +private: + int o, i; + int threshold; + int idx, step, cols; + int nOctaveLayers; + double contrastThreshold; + double edgeThreshold; + double sigma; + const std::vector& gauss_pyr; + const std::vector& dog_pyr; + std::vector& keypoints; + Mutex &mutex; +}; + +// +// Detects features at extrema in DoG scale space. Bad features are discarded +// based on contrast and ratio of principal curvatures. +void SIFT_Impl::findScaleSpaceExtrema( const std::vector& gauss_pyr, const std::vector& dog_pyr, + std::vector& keypoints ) const +{ + const int nOctaves = (int)gauss_pyr.size()/(nOctaveLayers + 3); + const int threshold = cvFloor(0.5 * contrastThreshold / nOctaveLayers * 255 * SIFT_FIXPT_SCALE); + + keypoints.clear(); + Mutex mutex; + + for( int o = 0; o < nOctaves; o++ ) + for( int i = 1; i <= nOctaveLayers; i++ ) + { + const int idx = o*(nOctaveLayers+2)+i; + const Mat& img = dog_pyr[idx]; + const int step = (int)img.step1(); + const int rows = img.rows, cols = img.cols; + + parallel_for_(Range(SIFT_IMG_BORDER, rows-SIFT_IMG_BORDER), + findScaleSpaceExtremaComputer( + o, i, threshold, idx, step, cols, + nOctaveLayers, + contrastThreshold, + edgeThreshold, + sigma, + gauss_pyr, dog_pyr, keypoints, mutex)); + } } @@ -1046,7 +1114,7 @@ void SIFT_Impl::detectAndCompute(InputArray _image, InputArray _mask, { //t = (double)getTickCount(); findScaleSpaceExtrema(gpyr, dogpyr, keypoints); - KeyPointsFilter::removeDuplicated( keypoints ); + KeyPointsFilter::removeDuplicatedSorted( keypoints ); if( nfeatures > 0 ) KeyPointsFilter::retainBest(keypoints, nfeatures); From 0f0dea79fc812eecac9865ab575388df84b51189 Mon Sep 17 00:00:00 2001 From: Woody Chow Date: Wed, 31 May 2017 15:08:32 +0900 Subject: [PATCH 11/20] [move sift.cpp] Use TLS instead of mutex in SIFT original commit: https://github.com/opencv/opencv_contrib/commit/ab43a3b2d99c490be8635987923e7571fd95a0c9 --- modules/features2d/src/sift.cpp | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index bbc8725e36..7fd9a2603c 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -550,8 +550,7 @@ public: double _sigma, const std::vector& _gauss_pyr, const std::vector& _dog_pyr, - std::vector& _keypoints, - Mutex &_mutex) + TLSData > &_tls_kpts_struct) : o(_o), i(_i), @@ -565,8 +564,7 @@ public: sigma(_sigma), gauss_pyr(_gauss_pyr), dog_pyr(_dog_pyr), - keypoints(_keypoints), - mutex(_mutex) { } + tls_kpts_struct(_tls_kpts_struct) { } void operator()( const cv::Range& range ) const { const int begin = range.start; @@ -579,6 +577,8 @@ public: const Mat& prev = dog_pyr[idx-1]; const Mat& next = dog_pyr[idx+1]; + std::vector *tls_kpts = tls_kpts_struct.get(); + KeyPoint kpt; for( int r = begin; r < end; r++) { @@ -636,8 +636,7 @@ public: if(std::abs(kpt.angle - 360.f) < FLT_EPSILON) kpt.angle = 0.f; { - AutoLock autoLock(mutex); - keypoints.push_back(kpt); + tls_kpts->push_back(kpt); } } } @@ -655,8 +654,7 @@ private: double sigma; const std::vector& gauss_pyr; const std::vector& dog_pyr; - std::vector& keypoints; - Mutex &mutex; + TLSData > &tls_kpts_struct; }; // @@ -669,7 +667,7 @@ void SIFT_Impl::findScaleSpaceExtrema( const std::vector& gauss_pyr, const const int threshold = cvFloor(0.5 * contrastThreshold / nOctaveLayers * 255 * SIFT_FIXPT_SCALE); keypoints.clear(); - Mutex mutex; + TLSData > tls_kpts_struct; for( int o = 0; o < nOctaves; o++ ) for( int i = 1; i <= nOctaveLayers; i++ ) @@ -686,8 +684,14 @@ void SIFT_Impl::findScaleSpaceExtrema( const std::vector& gauss_pyr, const contrastThreshold, edgeThreshold, sigma, - gauss_pyr, dog_pyr, keypoints, mutex)); + gauss_pyr, dog_pyr, tls_kpts_struct)); } + + std::vector*> kpt_vecs; + tls_kpts_struct.gather(kpt_vecs); + for (size_t i = 0; i < kpt_vecs.size(); ++i) { + keypoints.insert(keypoints.end(), kpt_vecs[i]->begin(), kpt_vecs[i]->end()); + } } From 69d43e2997023c476ac332dfb52810d69be3a94b Mon Sep 17 00:00:00 2001 From: "woody.chow" Date: Tue, 26 Sep 2017 10:12:30 +0900 Subject: [PATCH 12/20] [move sift.cpp] Remove unnecessary _mm256_round_ps original commit: https://github.com/opencv/opencv_contrib/commit/b5340f6428b6d48445947b16ef2df04552451330 --- modules/features2d/src/sift.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index 7fd9a2603c..30a445d66e 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -343,7 +343,7 @@ static float calcOrientationHist( const Mat& img, Point pt, int radius, float CV_DECL_ALIGNED(32) w_mul_mag_buf[8]; for ( ; k <= len - 8; k+=8 ) { - __m256i __bin = _mm256_cvtps_epi32(_mm256_round_ps(_mm256_mul_ps(__nd360, _mm256_loadu_ps(&Ori[k])), _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)); + __m256i __bin = _mm256_cvtps_epi32(_mm256_mul_ps(__nd360, _mm256_loadu_ps(&Ori[k]))); __bin = _mm256_sub_epi32(__bin, _mm256_andnot_si256(_mm256_cmpgt_epi32(__n, __bin), __n)); __bin = _mm256_add_epi32(__bin, _mm256_and_si256(__n, _mm256_cmpgt_epi32(_mm256_setzero_si256(), __bin))); From c5f06814bca656354730d5ad0c4b43afab595f94 Mon Sep 17 00:00:00 2001 From: Vitaly Tuzov Date: Fri, 1 Dec 2017 13:10:59 +0300 Subject: [PATCH 13/20] [move sift.cpp] Updated internal calls to linear resize to use bit-exact version original commit: https://github.com/opencv/opencv_contrib/commit/8c394a4f2e02994f21c3cf88706ad62d65b53431 --- modules/features2d/src/sift.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index 30a445d66e..b17808b8e1 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -162,7 +162,8 @@ static const float SIFT_DESCR_MAG_THR = 0.2f; // factor used to convert floating-point descriptor to unsigned char static const float SIFT_INT_DESCR_FCTR = 512.f; -#if 0 +#define DoG_TYPE_SHORT 0 +#if DoG_TYPE_SHORT // intermediate type used for DoG pyramids typedef short sift_wt; static const int SIFT_FIXPT_SCALE = 48; @@ -198,7 +199,11 @@ static Mat createInitialImage( const Mat& img, bool doubleImageSize, float sigma { sig_diff = sqrtf( std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA * 4, 0.01f) ); Mat dbl; +#if DoG_TYPE_SHORT + resize(gray_fpt, dbl, Size(gray_fpt.cols*2, gray_fpt.rows*2), 0, 0, INTER_LINEAR_EXACT); +#else resize(gray_fpt, dbl, Size(gray_fpt.cols*2, gray_fpt.rows*2), 0, 0, INTER_LINEAR); +#endif GaussianBlur(dbl, dbl, Size(), sig_diff, sig_diff); return dbl; } From 9badb0d9036fb069fea2eecc0d510b7b425d26d5 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 21 Mar 2018 16:58:22 +0300 Subject: [PATCH 14/20] [move sift.cpp] xfeatures2d: apply CV_OVERRIDE/CV_FINAL original commit: https://github.com/opencv/opencv_contrib/commit/ec65e5b29c1c4928aee3d56a44bb399d4af6b61a --- modules/features2d/src/sift.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index b17808b8e1..c57d55f840 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -90,20 +90,20 @@ public: double sigma = 1.6); //! returns the descriptor size in floats (128) - int descriptorSize() const; + int descriptorSize() const CV_OVERRIDE; //! returns the descriptor type - int descriptorType() const; + int descriptorType() const CV_OVERRIDE; //! returns the default norm type - int defaultNorm() const; + int defaultNorm() const CV_OVERRIDE; //! finds the keypoints and computes descriptors for them using SIFT algorithm. //! Optionally it can compute descriptors for the user-provided keypoints void detectAndCompute(InputArray img, InputArray mask, std::vector& keypoints, OutputArray descriptors, - bool useProvidedKeypoints = false); + bool useProvidedKeypoints = false) CV_OVERRIDE; void buildGaussianPyramid( const Mat& base, std::vector& pyr, int nOctaves ) const; void buildDoGPyramid( const std::vector& pyr, std::vector& dogpyr ) const; @@ -267,7 +267,7 @@ public: gpyr(_gpyr), dogpyr(_dogpyr) { } - void operator()( const cv::Range& range ) const + void operator()( const cv::Range& range ) const CV_OVERRIDE { const int begin = range.start; const int end = range.end; @@ -570,7 +570,7 @@ public: gauss_pyr(_gauss_pyr), dog_pyr(_dog_pyr), tls_kpts_struct(_tls_kpts_struct) { } - void operator()( const cv::Range& range ) const + void operator()( const cv::Range& range ) const CV_OVERRIDE { const int begin = range.start; const int end = range.end; @@ -1011,7 +1011,7 @@ public: nOctaveLayers(_nOctaveLayers), firstOctave(_firstOctave) { } - void operator()( const cv::Range& range ) const + void operator()( const cv::Range& range ) const CV_OVERRIDE { const int begin = range.start; const int end = range.end; From fd46684bf874f659e5f516f5fc4a651cc887f8a2 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sun, 10 Jun 2018 22:53:13 +0000 Subject: [PATCH 15/20] [move sift.cpp] opencv: use cv::AutoBuffer<>::data() original commit: https://github.com/opencv/opencv_contrib/commit/fc69aa57bc021422d825a4331f7ddf7d9f7534e7 --- modules/features2d/src/sift.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index c57d55f840..15334b7735 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -306,7 +306,7 @@ static float calcOrientationHist( const Mat& img, Point pt, int radius, float expf_scale = -1.f/(2.f * sigma * sigma); AutoBuffer buf(len*4 + n+4); - float *X = buf, *Y = X + len, *Mag = X, *Ori = Y + len, *W = Ori + len; + float *X = buf.data(), *Y = X + len, *Mag = X, *Ori = Y + len, *W = Ori + len; float* temphist = W + len + 2; for( i = 0; i < n; i++ ) @@ -719,7 +719,7 @@ static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float sc int rows = img.rows, cols = img.cols; AutoBuffer buf(len*6 + histlen); - float *X = buf, *Y = X + len, *Mag = Y, *Ori = Mag + len, *W = Ori + len; + float *X = buf.data(), *Y = X + len, *Mag = Y, *Ori = Mag + len, *W = Ori + len; float *RBin = W + len, *CBin = RBin + len, *hist = CBin + len; for( i = 0; i < d+2; i++ ) From fadb90c579acf292cf35dfcf67ab10139bb73fb2 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 17 Oct 2019 00:44:20 +0000 Subject: [PATCH 16/20] [move sift.cpp] xfeatures2d: use updated TLS API original commit: https://github.com/opencv/opencv_contrib/commit/3e4fb8f415ba68c56ba2cded1ab10a75c46d342e --- modules/features2d/src/sift.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index 15334b7735..8a13de7118 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -74,6 +74,8 @@ #include #include +#include + namespace cv { @@ -672,7 +674,7 @@ void SIFT_Impl::findScaleSpaceExtrema( const std::vector& gauss_pyr, const const int threshold = cvFloor(0.5 * contrastThreshold / nOctaveLayers * 255 * SIFT_FIXPT_SCALE); keypoints.clear(); - TLSData > tls_kpts_struct; + TLSDataAccumulator > tls_kpts_struct; for( int o = 0; o < nOctaves; o++ ) for( int i = 1; i <= nOctaveLayers; i++ ) From 29478778719037c82962a7f63890a4bda8bd57ed Mon Sep 17 00:00:00 2001 From: ab-dragon Date: Fri, 1 Nov 2019 18:28:18 +0000 Subject: [PATCH 17/20] [move sift.cpp] Merge pull request opencv/opencv_contrib#2301 from ab-dragon:conditionally_compute_dog_pyramid Build DoG Pyramid if useProvideKeypoints is false The buildDoGPyramid operation need not be performed unconditionally. In cases where it is not needed, both memory and speed performance can be improved original commit: https://github.com/opencv/opencv_contrib/commit/e45887e1c097db6e5f75dc70d7723203bcafa5f1 --- modules/features2d/src/sift.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index 8a13de7118..9e39fde879 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -1110,19 +1110,20 @@ void SIFT_Impl::detectAndCompute(InputArray _image, InputArray _mask, } Mat base = createInitialImage(image, firstOctave < 0, (float)sigma); - std::vector gpyr, dogpyr; + std::vector gpyr; int nOctaves = actualNOctaves > 0 ? actualNOctaves : cvRound(std::log( (double)std::min( base.cols, base.rows ) ) / std::log(2.) - 2) - firstOctave; //double t, tf = getTickFrequency(); //t = (double)getTickCount(); buildGaussianPyramid(base, gpyr, nOctaves); - buildDoGPyramid(gpyr, dogpyr); //t = (double)getTickCount() - t; //printf("pyramid construction time: %g\n", t*1000./tf); if( !useProvidedKeypoints ) { + std::vector dogpyr; + buildDoGPyramid(gpyr, dogpyr); //t = (double)getTickCount(); findScaleSpaceExtrema(gpyr, dogpyr, keypoints); KeyPointsFilter::removeDuplicatedSorted( keypoints ); From ef5fa498d46a2e58bf9022885e31ae6cc42e9e5b Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 16 Apr 2020 05:33:54 +0000 Subject: [PATCH 18/20] [move sift.cpp] sift: perf tests and trace regions original commit: https://github.com/opencv/opencv_contrib/commit/a15e105db12aa7a0bade47afb791682201e46f60 --- modules/features2d/perf/perf_sift.cpp | 72 +++++++++++++++++++++++++++ modules/features2d/src/sift.cpp | 26 ++++++++++ 2 files changed, 98 insertions(+) create mode 100644 modules/features2d/perf/perf_sift.cpp diff --git a/modules/features2d/perf/perf_sift.cpp b/modules/features2d/perf/perf_sift.cpp new file mode 100644 index 0000000000..156ebfe8b8 --- /dev/null +++ b/modules/features2d/perf/perf_sift.cpp @@ -0,0 +1,72 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#include "perf_precomp.hpp" + +namespace opencv_test { namespace { + +typedef perf::TestBaseWithParam sift; + +#define SIFT_IMAGES \ + "cv/detectors_descriptors_evaluation/images_datasets/leuven/img1.png",\ + "stitching/a3.png" + +PERF_TEST_P(sift, detect, testing::Values(SIFT_IMAGES)) +{ + string filename = getDataPath(GetParam()); + Mat frame = imread(filename, IMREAD_GRAYSCALE); + ASSERT_FALSE(frame.empty()) << "Unable to load source image " << filename; + + Mat mask; + declare.in(frame).time(90); + Ptr detector = SIFT::create(); + vector points; + + PERF_SAMPLE_BEGIN(); + detector->detect(frame, points, mask); + PERF_SAMPLE_END(); + + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(sift, extract, testing::Values(SIFT_IMAGES)) +{ + string filename = getDataPath(GetParam()); + Mat frame = imread(filename, IMREAD_GRAYSCALE); + ASSERT_FALSE(frame.empty()) << "Unable to load source image " << filename; + + Mat mask; + declare.in(frame).time(90); + + Ptr detector = SIFT::create(); + vector points; + Mat descriptors; + detector->detect(frame, points, mask); + + PERF_SAMPLE_BEGIN(); + detector->compute(frame, points, descriptors); + PERF_SAMPLE_END(); + + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P(sift, full, testing::Values(SIFT_IMAGES)) +{ + string filename = getDataPath(GetParam()); + Mat frame = imread(filename, IMREAD_GRAYSCALE); + ASSERT_FALSE(frame.empty()) << "Unable to load source image " << filename; + + Mat mask; + declare.in(frame).time(90); + Ptr detector = SIFT::create(); + vector points; + Mat descriptors; + + PERF_SAMPLE_BEGIN(); + detector->detectAndCompute(frame, mask, points, descriptors, false); + PERF_SAMPLE_END(); + + SANITY_CHECK_NOTHING(); +} + +}} // namespace diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index 9e39fde879..e8950e6f08 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -123,6 +123,7 @@ protected: Ptr SIFT::create( int _nfeatures, int _nOctaveLayers, double _contrastThreshold, double _edgeThreshold, double _sigma ) { + CV_TRACE_FUNCTION(); return makePtr(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma); } @@ -186,6 +187,8 @@ unpackOctave(const KeyPoint& kpt, int& octave, int& layer, float& scale) static Mat createInitialImage( const Mat& img, bool doubleImageSize, float sigma ) { + CV_TRACE_FUNCTION(); + Mat gray, gray_fpt; if( img.channels() == 3 || img.channels() == 4 ) { @@ -220,6 +223,8 @@ static Mat createInitialImage( const Mat& img, bool doubleImageSize, float sigma void SIFT_Impl::buildGaussianPyramid( const Mat& base, std::vector& pyr, int nOctaves ) const { + CV_TRACE_FUNCTION(); + std::vector sig(nOctaveLayers + 3); pyr.resize(nOctaves*(nOctaveLayers + 3)); @@ -271,6 +276,8 @@ public: void operator()( const cv::Range& range ) const CV_OVERRIDE { + CV_TRACE_FUNCTION(); + const int begin = range.start; const int end = range.end; @@ -294,6 +301,8 @@ private: void SIFT_Impl::buildDoGPyramid( const std::vector& gpyr, std::vector& dogpyr ) const { + CV_TRACE_FUNCTION(); + int nOctaves = (int)gpyr.size()/(nOctaveLayers + 3); dogpyr.resize( nOctaves*(nOctaveLayers + 2) ); @@ -304,6 +313,8 @@ void SIFT_Impl::buildDoGPyramid( const std::vector& gpyr, std::vector& static float calcOrientationHist( const Mat& img, Point pt, int radius, float sigma, float* hist, int n ) { + CV_TRACE_FUNCTION(); + int i, j, k, len = (radius*2+1)*(radius*2+1); float expf_scale = -1.f/(2.f * sigma * sigma); @@ -442,6 +453,8 @@ static bool adjustLocalExtrema( const std::vector& dog_pyr, KeyPoint& kpt, int& layer, int& r, int& c, int nOctaveLayers, float contrastThreshold, float edgeThreshold, float sigma ) { + CV_TRACE_FUNCTION(); + const float img_scale = 1.f/(255*SIFT_FIXPT_SCALE); const float deriv_scale = img_scale*0.5f; const float second_deriv_scale = img_scale; @@ -574,6 +587,8 @@ public: tls_kpts_struct(_tls_kpts_struct) { } void operator()( const cv::Range& range ) const CV_OVERRIDE { + CV_TRACE_FUNCTION(); + const int begin = range.start; const int end = range.end; @@ -618,6 +633,8 @@ public: val <= prevptr[c-step-1] && val <= prevptr[c-step] && val <= prevptr[c-step+1] && val <= prevptr[c+step-1] && val <= prevptr[c+step] && val <= prevptr[c+step+1]))) { + CV_TRACE_REGION("pixel_candidate"); + int r1 = r, c1 = c, layer = i; if( !adjustLocalExtrema(dog_pyr, kpt, o, layer, r1, c1, nOctaveLayers, (float)contrastThreshold, @@ -670,6 +687,8 @@ private: void SIFT_Impl::findScaleSpaceExtrema( const std::vector& gauss_pyr, const std::vector& dog_pyr, std::vector& keypoints ) const { + CV_TRACE_FUNCTION(); + const int nOctaves = (int)gauss_pyr.size()/(nOctaveLayers + 3); const int threshold = cvFloor(0.5 * contrastThreshold / nOctaveLayers * 255 * SIFT_FIXPT_SCALE); @@ -705,6 +724,8 @@ void SIFT_Impl::findScaleSpaceExtrema( const std::vector& gauss_pyr, const static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float scl, int d, int n, float* dst ) { + CV_TRACE_FUNCTION(); + Point pt(cvRound(ptf.x), cvRound(ptf.y)); float cos_t = cosf(ori*(float)(CV_PI/180)); float sin_t = sinf(ori*(float)(CV_PI/180)); @@ -1015,6 +1036,8 @@ public: void operator()( const cv::Range& range ) const CV_OVERRIDE { + CV_TRACE_FUNCTION(); + const int begin = range.start; const int end = range.end; @@ -1048,6 +1071,7 @@ private: static void calcDescriptors(const std::vector& gpyr, const std::vector& keypoints, Mat& descriptors, int nOctaveLayers, int firstOctave ) { + CV_TRACE_FUNCTION(); parallel_for_(Range(0, static_cast(keypoints.size())), calcDescriptorsComputer(gpyr, keypoints, descriptors, nOctaveLayers, firstOctave)); } @@ -1081,6 +1105,8 @@ void SIFT_Impl::detectAndCompute(InputArray _image, InputArray _mask, OutputArray _descriptors, bool useProvidedKeypoints) { + CV_TRACE_FUNCTION(); + int firstOctave = -1, actualNOctaves = 0, actualNLayers = 0; Mat image = _image.getMat(), mask = _mask.getMat(); From ed58b5489fe84340066e5bf636e56fd41a3fe105 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 16 Apr 2020 14:46:38 +0000 Subject: [PATCH 19/20] [move sift.cpp] sift: avoid inplace calls of GaussianBlur - should unlock IPP optimizations original commit: https://github.com/opencv/opencv_contrib/commit/ce7c8f2646ccf3f5e657ab1241e22c0c32cd9d41 --- modules/features2d/src/sift.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index e8950e6f08..74e46c5a72 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -209,14 +209,16 @@ static Mat createInitialImage( const Mat& img, bool doubleImageSize, float sigma #else resize(gray_fpt, dbl, Size(gray_fpt.cols*2, gray_fpt.rows*2), 0, 0, INTER_LINEAR); #endif - GaussianBlur(dbl, dbl, Size(), sig_diff, sig_diff); - return dbl; + Mat result; + GaussianBlur(dbl, result, Size(), sig_diff, sig_diff); + return result; } else { sig_diff = sqrtf( std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA, 0.01f) ); - GaussianBlur(gray_fpt, gray_fpt, Size(), sig_diff, sig_diff); - return gray_fpt; + Mat result; + GaussianBlur(gray_fpt, result, Size(), sig_diff, sig_diff); + return result; } } From a3274187671c4b4c2868f2ca8dd3face19f2176f Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 21 Apr 2020 06:58:21 +0000 Subject: [PATCH 20/20] features2d(sift): move SIFT tests / headers / build fixes --- doc/opencv.bib | 2 +- .../py_feature_homography.markdown | 2 +- .../py_matcher/py_matcher.markdown | 4 +- .../py_sift_intro/py_sift_intro.markdown | 4 +- .../feature_flann_matcher.markdown | 2 +- .../features2d/include/opencv2/features2d.hpp | 33 ++++++++++++++++ .../test/SIFTDescriptorExtractorTest.java | 3 +- modules/features2d/perf/perf_sift.cpp | 21 ++++++++-- modules/features2d/src/sift.cpp | 6 --- .../test/test_descriptors_invariance.cpp | 7 ++++ .../test/test_descriptors_regression.cpp | 38 ++++++++++++++++++- .../test/test_detectors_invariance.cpp | 7 ++++ .../test/test_detectors_regression.cpp | 6 +++ modules/features2d/test/test_keypoints.cpp | 7 ++++ modules/stitching/src/matchers.cpp | 7 ---- .../real_time_pose_estimation/src/Utils.cpp | 11 +----- samples/python/find_obj.py | 2 +- 17 files changed, 126 insertions(+), 36 deletions(-) diff --git a/doc/opencv.bib b/doc/opencv.bib index 0135418d4a..901fbe86ee 100644 --- a/doc/opencv.bib +++ b/doc/opencv.bib @@ -620,7 +620,7 @@ volume = {1}, publisher = {IEEE} } -@article{Lowe:2004:DIF:993451.996342, +@article{Lowe04, author = {Lowe, David G.}, title = {Distinctive Image Features from Scale-Invariant Keypoints}, journal = {Int. J. Comput. Vision}, diff --git a/doc/py_tutorials/py_feature2d/py_feature_homography/py_feature_homography.markdown b/doc/py_tutorials/py_feature2d/py_feature_homography/py_feature_homography.markdown index ed76d638f5..8602cc9398 100644 --- a/doc/py_tutorials/py_feature2d/py_feature_homography/py_feature_homography.markdown +++ b/doc/py_tutorials/py_feature2d/py_feature_homography/py_feature_homography.markdown @@ -44,7 +44,7 @@ img1 = cv.imread('box.png',0) # queryImage img2 = cv.imread('box_in_scene.png',0) # trainImage # Initiate SIFT detector -sift = cv.xfeatures2d.SIFT_create() +sift = cv.SIFT_create() # find the keypoints and descriptors with SIFT kp1, des1 = sift.detectAndCompute(img1,None) diff --git a/doc/py_tutorials/py_feature2d/py_matcher/py_matcher.markdown b/doc/py_tutorials/py_feature2d/py_matcher/py_matcher.markdown index d8ba8f856d..aeab98bfd6 100644 --- a/doc/py_tutorials/py_feature2d/py_matcher/py_matcher.markdown +++ b/doc/py_tutorials/py_feature2d/py_matcher/py_matcher.markdown @@ -110,7 +110,7 @@ img1 = cv.imread('box.png',cv.IMREAD_GRAYSCALE) # queryImage img2 = cv.imread('box_in_scene.png',cv.IMREAD_GRAYSCALE) # trainImage # Initiate SIFT detector -sift = cv.xfeatures2d.SIFT_create() +sift = cv.SIFT_create() # find the keypoints and descriptors with SIFT kp1, des1 = sift.detectAndCompute(img1,None) @@ -174,7 +174,7 @@ img1 = cv.imread('box.png',cv.IMREAD_GRAYSCALE) # queryImage img2 = cv.imread('box_in_scene.png',cv.IMREAD_GRAYSCALE) # trainImage # Initiate SIFT detector -sift = cv.xfeatures2d.SIFT_create() +sift = cv.SIFT_create() # find the keypoints and descriptors with SIFT kp1, des1 = sift.detectAndCompute(img1,None) diff --git a/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown b/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown index f9d70938eb..656f5423c5 100644 --- a/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown +++ b/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown @@ -119,7 +119,7 @@ import cv2 as cv img = cv.imread('home.jpg') gray= cv.cvtColor(img,cv.COLOR_BGR2GRAY) -sift = cv.xfeatures2d.SIFT_create() +sift = cv.SIFT_create() kp = sift.detect(gray,None) img=cv.drawKeypoints(gray,kp,img) @@ -151,7 +151,7 @@ Now to calculate the descriptor, OpenCV provides two methods. We will see the second method: @code{.py} -sift = cv.xfeatures2d.SIFT_create() +sift = cv.SIFT_create() kp, des = sift.detectAndCompute(gray,None) @endcode Here kp will be a list of keypoints and des is a numpy array of shape diff --git a/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.markdown b/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.markdown index 4b3f3daddf..47ba7a0460 100644 --- a/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.markdown +++ b/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.markdown @@ -27,7 +27,7 @@ Binary descriptors (ORB, BRISK, ...) are matched using the sift; +typedef perf::TestBaseWithParam SIFT_detect; +typedef perf::TestBaseWithParam SIFT_extract; +typedef perf::TestBaseWithParam SIFT_full; #define SIFT_IMAGES \ "cv/detectors_descriptors_evaluation/images_datasets/leuven/img1.png",\ "stitching/a3.png" -PERF_TEST_P(sift, detect, testing::Values(SIFT_IMAGES)) +PERF_TEST_P_(SIFT_detect, SIFT) { string filename = getDataPath(GetParam()); Mat frame = imread(filename, IMREAD_GRAYSCALE); @@ -29,7 +31,7 @@ PERF_TEST_P(sift, detect, testing::Values(SIFT_IMAGES)) SANITY_CHECK_NOTHING(); } -PERF_TEST_P(sift, extract, testing::Values(SIFT_IMAGES)) +PERF_TEST_P_(SIFT_extract, SIFT) { string filename = getDataPath(GetParam()); Mat frame = imread(filename, IMREAD_GRAYSCALE); @@ -50,7 +52,7 @@ PERF_TEST_P(sift, extract, testing::Values(SIFT_IMAGES)) SANITY_CHECK_NOTHING(); } -PERF_TEST_P(sift, full, testing::Values(SIFT_IMAGES)) +PERF_TEST_P_(SIFT_full, SIFT) { string filename = getDataPath(GetParam()); Mat frame = imread(filename, IMREAD_GRAYSCALE); @@ -69,4 +71,15 @@ PERF_TEST_P(sift, full, testing::Values(SIFT_IMAGES)) SANITY_CHECK_NOTHING(); } + +INSTANTIATE_TEST_CASE_P(/*nothing*/, SIFT_detect, + testing::Values(SIFT_IMAGES) +); +INSTANTIATE_TEST_CASE_P(/*nothing*/, SIFT_extract, + testing::Values(SIFT_IMAGES) +); +INSTANTIATE_TEST_CASE_P(/*nothing*/, SIFT_full, + testing::Values(SIFT_IMAGES) +); + }} // namespace diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp index 74e46c5a72..81254ecbd8 100644 --- a/modules/features2d/src/sift.cpp +++ b/modules/features2d/src/sift.cpp @@ -355,7 +355,6 @@ static float calcOrientationHist( const Mat& img, Point pt, int radius, k = 0; #if CV_AVX2 - if( USE_AVX2 ) { __m256 __nd360 = _mm256_set1_ps(n/360.f); __m256i __n = _mm256_set1_epi32(n); @@ -402,7 +401,6 @@ static float calcOrientationHist( const Mat& img, Point pt, int radius, i = 0; #if CV_AVX2 - if( USE_AVX2 ) { __m256 __d_1_16 = _mm256_set1_ps(1.f/16.f); __m256 __d_4_16 = _mm256_set1_ps(4.f/16.f); @@ -784,7 +782,6 @@ static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float sc k = 0; #if CV_AVX2 - if( USE_AVX2 ) { int CV_DECL_ALIGNED(32) idx_buf[8]; float CV_DECL_ALIGNED(32) rco_buf[64]; @@ -928,7 +925,6 @@ static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float sc len = d*d*n; k = 0; #if CV_AVX2 - if( USE_AVX2 ) { float CV_DECL_ALIGNED(32) nrm2_buf[8]; __m256 __nrm2 = _mm256_setzero_ps(); @@ -956,7 +952,6 @@ static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float sc #if 0 //CV_AVX2 // This code cannot be enabled because it sums nrm2 in a different order, // thus producing slightly different results - if( USE_AVX2 ) { float CV_DECL_ALIGNED(32) nrm2_buf[8]; __m256 __dst; @@ -989,7 +984,6 @@ static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float sc #if 1 k = 0; #if CV_AVX2 - if( USE_AVX2 ) { __m256 __dst; __m256 __min = _mm256_setzero_ps(); diff --git a/modules/features2d/test/test_descriptors_invariance.cpp b/modules/features2d/test/test_descriptors_invariance.cpp index f9c65067c5..0bbcad43c5 100644 --- a/modules/features2d/test/test_descriptors_invariance.cpp +++ b/modules/features2d/test/test_descriptors_invariance.cpp @@ -167,6 +167,9 @@ TEST_P(DescriptorScaleInvariance, scale) * Descriptors's rotation invariance check */ +INSTANTIATE_TEST_CASE_P(SIFT, DescriptorRotationInvariance, + Value(IMAGE_TSUKUBA, SIFT::create(), SIFT::create(), 0.98f)); + INSTANTIATE_TEST_CASE_P(BRISK, DescriptorRotationInvariance, Value(IMAGE_TSUKUBA, BRISK::create(), BRISK::create(), 0.99f)); @@ -183,6 +186,10 @@ INSTANTIATE_TEST_CASE_P(AKAZE_DESCRIPTOR_KAZE, DescriptorRotationInvariance, * Descriptor's scale invariance check */ +// TODO: Expected: (descInliersRatio) >= (minInliersRatio), actual: 0.330378 vs 0.78 +INSTANTIATE_TEST_CASE_P(DISABLED_SIFT, DescriptorScaleInvariance, + Value(IMAGE_BIKES, SIFT::create(), SIFT::create(), 0.78f)); + INSTANTIATE_TEST_CASE_P(AKAZE, DescriptorScaleInvariance, Value(IMAGE_BIKES, AKAZE::create(), AKAZE::create(), 0.6f)); diff --git a/modules/features2d/test/test_descriptors_regression.cpp b/modules/features2d/test/test_descriptors_regression.cpp index c63b9a56ce..f8760d6478 100644 --- a/modules/features2d/test/test_descriptors_regression.cpp +++ b/modules/features2d/test/test_descriptors_regression.cpp @@ -342,6 +342,13 @@ private: * Tests registrations * \****************************************************************************************/ +TEST( Features2d_DescriptorExtractor_SIFT, regression ) +{ + CV_DescriptorExtractorTest > test( "descriptor-sift", 1.0f, + SIFT::create() ); + test.safe_run(); +} + TEST( Features2d_DescriptorExtractor_BRISK, regression ) { CV_DescriptorExtractorTest test( "descriptor-brisk", @@ -388,7 +395,7 @@ TEST( Features2d_DescriptorExtractor_AKAZE_DESCRIPTOR_KAZE, regression ) test.safe_run(); } -TEST( Features2d_DescriptorExtractor, batch ) +TEST( Features2d_DescriptorExtractor, batch_ORB ) { string path = string(cvtest::TS::ptr()->get_data_path() + "detectors_descriptors_evaluation/images_datasets/graf"); vector imgs, descriptors; @@ -416,6 +423,35 @@ TEST( Features2d_DescriptorExtractor, batch ) } } +TEST( Features2d_DescriptorExtractor, batch_SIFT ) +{ + string path = string(cvtest::TS::ptr()->get_data_path() + "detectors_descriptors_evaluation/images_datasets/graf"); + vector imgs, descriptors; + vector > keypoints; + int i, n = 6; + Ptr sift = SIFT::create(); + + for( i = 0; i < n; i++ ) + { + string imgname = format("%s/img%d.png", path.c_str(), i+1); + Mat img = imread(imgname, 0); + imgs.push_back(img); + } + + sift->detect(imgs, keypoints); + sift->compute(imgs, keypoints, descriptors); + + ASSERT_EQ((int)keypoints.size(), n); + ASSERT_EQ((int)descriptors.size(), n); + + for( i = 0; i < n; i++ ) + { + EXPECT_GT((int)keypoints[i].size(), 100); + EXPECT_GT(descriptors[i].rows, 100); + } +} + + class DescriptorImage : public TestWithParam { protected: diff --git a/modules/features2d/test/test_detectors_invariance.cpp b/modules/features2d/test/test_detectors_invariance.cpp index c4c6874509..31ba12e60e 100644 --- a/modules/features2d/test/test_detectors_invariance.cpp +++ b/modules/features2d/test/test_detectors_invariance.cpp @@ -220,6 +220,9 @@ TEST_P(DetectorScaleInvariance, scale) * Detector's rotation invariance check */ +INSTANTIATE_TEST_CASE_P(SIFT, DetectorRotationInvariance, + Value(IMAGE_TSUKUBA, SIFT::create(), 0.45f, 0.70f)); + INSTANTIATE_TEST_CASE_P(BRISK, DetectorRotationInvariance, Value(IMAGE_TSUKUBA, BRISK::create(), 0.45f, 0.76f)); @@ -236,6 +239,10 @@ INSTANTIATE_TEST_CASE_P(AKAZE_DESCRIPTOR_KAZE, DetectorRotationInvariance, * Detector's scale invariance check */ +// TODO: Expected: (keyPointMatchesRatio) >= (minKeyPointMatchesRatio), actual: 0.596752 vs 0.69 +INSTANTIATE_TEST_CASE_P(DISABLED_SIFT, DetectorScaleInvariance, + Value(IMAGE_BIKES, SIFT::create(), 0.69f, 0.98f)); + INSTANTIATE_TEST_CASE_P(BRISK, DetectorScaleInvariance, Value(IMAGE_BIKES, BRISK::create(), 0.08f, 0.49f)); diff --git a/modules/features2d/test/test_detectors_regression.cpp b/modules/features2d/test/test_detectors_regression.cpp index 733841287a..5bdede7508 100644 --- a/modules/features2d/test/test_detectors_regression.cpp +++ b/modules/features2d/test/test_detectors_regression.cpp @@ -245,6 +245,12 @@ void CV_FeatureDetectorTest::run( int /*start_from*/ ) * Tests registrations * \****************************************************************************************/ +TEST( Features2d_Detector_SIFT, regression ) +{ + CV_FeatureDetectorTest test( "detector-sift", SIFT::create() ); + test.safe_run(); +} + TEST( Features2d_Detector_BRISK, regression ) { CV_FeatureDetectorTest test( "detector-brisk", BRISK::create() ); diff --git a/modules/features2d/test/test_keypoints.cpp b/modules/features2d/test/test_keypoints.cpp index b09d7ebf60..c169c97163 100644 --- a/modules/features2d/test/test_keypoints.cpp +++ b/modules/features2d/test/test_keypoints.cpp @@ -177,4 +177,11 @@ TEST(Features2d_Detector_Keypoints_AKAZE, validation) test_mldb.safe_run(); } +TEST(Features2d_Detector_Keypoints_SIFT, validation) +{ + CV_FeatureDetectorKeypointsTest test(SIFT::create()); + test.safe_run(); +} + + }} // namespace diff --git a/modules/stitching/src/matchers.cpp b/modules/stitching/src/matchers.cpp index 6105c67631..4c6cce8038 100644 --- a/modules/stitching/src/matchers.cpp +++ b/modules/stitching/src/matchers.cpp @@ -51,7 +51,6 @@ using namespace cv::cuda; #ifdef HAVE_OPENCV_XFEATURES2D #include "opencv2/xfeatures2d.hpp" using xfeatures2d::SURF; -using xfeatures2d::SIFT; #else # if defined(_MSC_VER) # pragma warning(disable:4702) // unreachable code @@ -487,14 +486,8 @@ void SurfFeaturesFinder::find(InputArray image, ImageFeatures &features) SiftFeaturesFinder::SiftFeaturesFinder() { -#ifdef HAVE_OPENCV_XFEATURES2D Ptr sift_ = SIFT::create(); - if( !sift_ ) - CV_Error( Error::StsNotImplemented, "OpenCV was built without SIFT support" ); sift = sift_; -#else - CV_Error( Error::StsNotImplemented, "OpenCV was built without SIFT support" ); -#endif } void SiftFeaturesFinder::find(InputArray image, ImageFeatures &features) diff --git a/samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/src/Utils.cpp b/samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/src/Utils.cpp index 23ea221eb4..6b1d553330 100644 --- a/samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/src/Utils.cpp +++ b/samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/src/Utils.cpp @@ -323,15 +323,8 @@ void createFeatures(const std::string &featureName, int numKeypoints, cv::Ptr