diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index c76705f1b3..a802868df0 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -50,6 +50,13 @@ namespace cv # pragma warning(disable: 4748) #endif +#if defined HAVE_IPP && IPP_VERSION_MAJOR >= 7 +#define USE_IPP_DFT 1 +#else +#undef USE_IPP_DFT +#endif + + /****************************************************************************************\ Discrete Fourier Transform \****************************************************************************************/ @@ -455,7 +462,7 @@ template<> struct DFT_VecR4 #endif -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT static void ippsDFTFwd_CToC( const Complex* src, Complex* dst, const void* spec, uchar* buf) { @@ -517,7 +524,7 @@ DFT( const Complex* src, Complex* dst, int n, int nf, const int* factors, const int* itab, const Complex* wave, int tab_size, const void* -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT spec #endif , Complex* buf, @@ -537,7 +544,7 @@ DFT( const Complex* src, Complex* dst, int n, T scale = (T)_scale; int tab_step; -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT if( spec ) { if( !inv ) @@ -957,7 +964,7 @@ DFT( const Complex* src, Complex* dst, int n, template static void RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, const Complex* wave, int tab_size, const void* -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT spec #endif , @@ -968,11 +975,18 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, int j, n2 = n >> 1; dst += complex_output; -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT if( spec ) { ippsDFTFwd_RToPack( src, dst, spec, (uchar*)buf ); - goto finalize; + if( complex_output ) + { + dst[-1] = dst[0]; + dst[0] = 0; + if( (n & 1) == 0 ) + dst[n] = 0; + } + return; } #endif assert( tab_size == n ); @@ -1056,15 +1070,11 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, } } -#ifdef HAVE_IPP -finalize: -#endif if( complex_output && (n & 1) == 0 ) { dst[-1] = dst[0]; dst[0] = 0; - if( (n & 1) == 0 ) - dst[n] = 0; + dst[n] = 0; } } @@ -1076,7 +1086,7 @@ template static void CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, const Complex* wave, int tab_size, const void* -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT spec #endif , Complex* buf, @@ -1097,7 +1107,7 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, ((T*)src)[1] = src[0]; src++; } -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT if( spec ) { ippsDFTInv_PackToR( src, dst, spec, (uchar*)buf ); @@ -1225,7 +1235,7 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, } } -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT finalize: #endif if( complex_input ) @@ -1458,7 +1468,7 @@ static void CCSIDFT_64f( const double* src, double* dst, int n, int nf, int* fac } -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, int*, int*, int*); typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*); #endif @@ -1486,7 +1496,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) int elem_size = (int)src.elemSize1(), complex_elem_size = elem_size*2; int factors[34]; bool inplace_transform = false; -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT AutoBuffer ippbuf; int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1; #endif @@ -1546,12 +1556,8 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) } spec = 0; -#ifdef HAVE_IPP - if( -#if IPP_VERSION_MAJOR >= 7 - depth == CV_32F && // IPP 7.x and 8.0 have bug somewhere in double-precision DFT -#endif - len*count >= 64 ) // use IPP DFT if available +#ifdef USE_IPP_DFT + if( len*count >= 64 ) // use IPP DFT if available { int specsize=0, initsize=0, worksize=0; IppDFTGetSizeFunc getSizeFunc = 0; diff --git a/modules/imgproc/src/canny.cpp b/modules/imgproc/src/canny.cpp index 4dae01343d..fb6afaf2b3 100644 --- a/modules/imgproc/src/canny.cpp +++ b/modules/imgproc/src/canny.cpp @@ -41,6 +41,50 @@ #include "precomp.hpp" +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) +#define USE_IPP_CANNY 1 +#else +#undef USE_IPP_CANNY +#endif + +#ifdef USE_IPP_CANNY +namespace cv +{ +static bool ippCanny(const Mat& _src, Mat& _dst, float low, float high) +{ + int size = 0, size1 = 0; + IppiSize roi = { _src.cols, _src.rows }; + + ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size); + ippiFilterSobelHorizGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size1); + size = std::max(size, size1); + ippiCannyGetSize(roi, &size1); + size = std::max(size, size1); + + AutoBuffer buf(size + 64); + uchar* buffer = alignPtr((uchar*)buf, 32); + + Mat _dx(_src.rows, _src.cols, CV_16S); + if( ippiFilterSobelNegVertBorder_8u16s_C1R(_src.data, (int)_src.step, + _dx.ptr(), (int)_dx.step, roi, + ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 ) + return false; + + Mat _dy(_src.rows, _src.cols, CV_16S); + if( ippiFilterSobelHorizBorder_8u16s_C1R(_src.data, (int)_src.step, + _dy.ptr(), (int)_dy.step, roi, + ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 ) + return false; + + if( ippiCanny_16s8u_C1R(_dx.ptr(), (int)_dx.step, + _dy.ptr(), (int)_dy.step, + _dst.data, (int)_dst.step, roi, low, high, buffer) < 0 ) + return false; + return true; +} +} +#endif + void cv::Canny( InputArray _src, OutputArray _dst, double low_thresh, double high_thresh, int aperture_size, bool L2gradient ) @@ -61,20 +105,26 @@ void cv::Canny( InputArray _src, OutputArray _dst, if ((aperture_size & 1) == 0 || (aperture_size != -1 && (aperture_size < 3 || aperture_size > 7))) CV_Error(CV_StsBadFlag, ""); + if (low_thresh > high_thresh) + std::swap(low_thresh, high_thresh); + #ifdef HAVE_TEGRA_OPTIMIZATION if (tegra::canny(src, dst, low_thresh, high_thresh, aperture_size, L2gradient)) return; #endif - const int cn = src.channels(); - cv::Mat dx(src.rows, src.cols, CV_16SC(cn)); - cv::Mat dy(src.rows, src.cols, CV_16SC(cn)); +#ifdef USE_IPP_CANNY + if( aperture_size == 3 && !L2gradient && + ippCanny(src, dst, low_thresh, high_thresh) >= 0 ) + return; +#endif - cv::Sobel(src, dx, CV_16S, 1, 0, aperture_size, 1, 0, cv::BORDER_REPLICATE); - cv::Sobel(src, dy, CV_16S, 0, 1, aperture_size, 1, 0, cv::BORDER_REPLICATE); + const int cn = src.channels(); + Mat dx(src.rows, src.cols, CV_16SC(cn)); + Mat dy(src.rows, src.cols, CV_16SC(cn)); - if (low_thresh > high_thresh) - std::swap(low_thresh, high_thresh); + Sobel(src, dx, CV_16S, 1, 0, aperture_size, 1, 0, cv::BORDER_REPLICATE); + Sobel(src, dy, CV_16S, 0, 1, aperture_size, 1, 0, cv::BORDER_REPLICATE); if (L2gradient) { @@ -88,7 +138,7 @@ void cv::Canny( InputArray _src, OutputArray _dst, int high = cvFloor(high_thresh); ptrdiff_t mapstep = src.cols + 2; - cv::AutoBuffer buffer((src.cols+2)*(src.rows+2) + cn * mapstep * 3 * sizeof(int)); + AutoBuffer buffer((src.cols+2)*(src.rows+2) + cn * mapstep * 3 * sizeof(int)); int* mag_buf[3]; mag_buf[0] = (int*)(uchar*)buffer; diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index 55bb0c0649..b8bb7cf381 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -1137,7 +1137,8 @@ private: }; #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) -static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kernel, const Point &anchor) +static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kernel, + const Size& ksize, const Point &anchor, bool rectKernel) { int type = src.type(); const Mat* _src = &src; @@ -1149,55 +1150,65 @@ static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kerne } //DEPRECATED. Allocates and initializes morphology state structure for erosion or dilation operation. typedef IppStatus (CV_STDCALL* ippiMorphologyInitAllocFunc)(int, const void*, IppiSize, IppiPoint, IppiMorphState **); - ippiMorphologyInitAllocFunc ippInitAllocFunc = - type == CV_8UC1 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_8u_C1R : - type == CV_8UC3 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_8u_C3R : - type == CV_8UC4 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_8u_C4R : - type == CV_32FC1 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_32f_C1R : - type == CV_32FC3 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_32f_C3R : - type == CV_32FC4 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_32f_C4R : - 0; - typedef IppStatus (CV_STDCALL* ippiMorphologyBorderReplicateFunc)(const void*, int, void *, int, IppiSize, IppiBorderType, IppiMorphState *); - ippiMorphologyBorderReplicateFunc ippFunc = 0; - switch( op ) + typedef IppStatus (CV_STDCALL* ippiMorphologyBorderReplicateFunc)(const void*, int, void *, int, + IppiSize, IppiBorderType, IppiMorphState *); + typedef IppStatus (CV_STDCALL* ippiFilterMinMaxGetBufferSizeFunc)(int, IppiSize, int*); + typedef IppStatus (CV_STDCALL* ippiFilterMinMaxBorderReplicateFunc)(const void*, int, void*, int, + IppiSize, IppiSize, IppiPoint, void*); + + ippiMorphologyInitAllocFunc initAllocFunc = 0; + ippiMorphologyBorderReplicateFunc morphFunc = 0; + ippiFilterMinMaxGetBufferSizeFunc getBufSizeFunc = 0; + ippiFilterMinMaxBorderReplicateFunc morphRectFunc = 0; + + #define IPP_MORPH_CASE(type, flavor) \ + case type: \ + initAllocFunc = (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_##flavor; \ + morphFunc = op == MORPH_ERODE ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_##flavor : \ + (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_##flavor; \ + getBufSizeFunc = (ippiFilterMinMaxGetBufferSizeFunc)ippiFilterMinGetBufferSize_##flavor; \ + morphRectFunc = op == MORPH_ERODE ? (ippiFilterMinMaxBorderReplicateFunc)ippiFilterMinBorderReplicate_##flavor : \ + (ippiFilterMinMaxBorderReplicateFunc)ippiFilterMaxBorderReplicate_##flavor; \ + break + + switch( type ) { - case MORPH_DILATE: - { - ippFunc = - type == CV_8UC1 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_8u_C1R : - type == CV_8UC3 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_8u_C3R : - type == CV_8UC4 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_8u_C4R : - type == CV_32FC1 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_32f_C1R : - type == CV_32FC3 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_32f_C3R : - type == CV_32FC4 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_32f_C4R : - 0; - break; - } - case MORPH_ERODE: - { - ippFunc = - type == CV_8UC1 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_8u_C1R : - type == CV_8UC3 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_8u_C3R : - type == CV_8UC4 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_8u_C4R : - type == CV_32FC1 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_32f_C1R : - type == CV_32FC3 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_32f_C3R : - type == CV_32FC4 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_32f_C4R : - 0; - break; - } + IPP_MORPH_CASE(CV_8UC1, 8u_C1R); + IPP_MORPH_CASE(CV_8UC3, 8u_C3R); + IPP_MORPH_CASE(CV_8UC4, 8u_C4R); + IPP_MORPH_CASE(CV_32FC1, 32f_C1R); + IPP_MORPH_CASE(CV_32FC3, 32f_C3R); + IPP_MORPH_CASE(CV_32FC4, 32f_C4R); + default: + return false; } - if( ippFunc && ippInitAllocFunc) + #undef IPP_MORPH_CASE + + IppiSize roiSize = {src.cols, src.rows}; + IppiSize kernelSize = {ksize.width, ksize.height}; + IppiPoint point = {anchor.x, anchor.y}; + + if( !rectKernel && morphFunc && initAllocFunc ) { IppiMorphState* pState; - IppiSize roiSize = {src.cols, src.rows}; - IppiSize kernelSize = {kernel.cols, kernel.rows}; - IppiPoint point = {anchor.x, anchor.y}; - if( ippInitAllocFunc( roiSize.width, kernel.data, kernelSize, point, &pState ) < 0 ) + if( initAllocFunc( roiSize.width, kernel.data, kernelSize, point, &pState ) < 0 ) return false; - bool is_ok = ippFunc( _src->data, _src->step[0], dst.data, dst.step[0], roiSize, ippBorderRepl, pState ) >= 0; + bool is_ok = morphFunc( _src->data, (int)_src->step[0], + dst.data, (int)dst.step[0], + roiSize, ippBorderRepl, pState ) >= 0; ippiMorphologyFree(pState); return is_ok; } + else if( rectKernel && morphRectFunc && getBufSizeFunc ) + { + int bufSize = 0; + if( getBufSizeFunc( src.cols, kernelSize, &bufSize) < 0 ) + return false; + AutoBuffer buf(bufSize + 64); + uchar* buffer = alignPtr((uchar*)buf, 32); + return morphRectFunc(_src->data, (int)_src->step[0], dst.data, (int)dst.step[0], + roiSize, kernelSize, point, buffer) >= 0; + } return false; } @@ -1211,7 +1222,7 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst, !( borderType == cv::BORDER_REPLICATE || (borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue()) ) || !( op == MORPH_DILATE || op == MORPH_ERODE) ) return false; - if( borderType == cv::BORDER_CONSTANT ) + if( borderType == cv::BORDER_CONSTANT && kernel.data ) { int x, y; for( y = 0; y < kernel.rows; y++ ) @@ -1250,23 +1261,29 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst, return true; } + bool rectKernel = false; if( !kernel.data ) { - kernel = getStructuringElement(MORPH_RECT, Size(1+iterations*2,1+iterations*2)); + ksize = Size(1+iterations*2,1+iterations*2); normanchor = Point(iterations, iterations); + rectKernel = true; iterations = 1; } - else if( iterations > 1 && countNonZero(kernel) == kernel.rows*kernel.cols ) + else if( iterations >= 1 && countNonZero(kernel) == kernel.rows*kernel.cols ) { + ksize = Size(ksize.width + (iterations-1)*(ksize.width-1), + ksize.height + (iterations-1)*(ksize.height-1)), normanchor = Point(normanchor.x*iterations, normanchor.y*iterations); - kernel = getStructuringElement(MORPH_RECT, - Size(ksize.width + (iterations-1)*(ksize.width-1), - ksize.height + (iterations-1)*(ksize.height-1)), - normanchor); + kernel = Mat(); + rectKernel = true; iterations = 1; } - return IPPMorphReplicate( op, src, dst, kernel, normanchor ); + // TODO: implement the case of iterations > 1. + if( iterations > 1 ) + return false; + + return IPPMorphReplicate( op, src, dst, kernel, ksize, normanchor, rectKernel ); } #endif @@ -1459,7 +1476,7 @@ static void convertConvKernel( const IplConvKernel* src, cv::Mat& dst, cv::Point int i, size = src->nRows*src->nCols; for( i = 0; i < size; i++ ) - dst.data[i] = (uchar)src->values[i]; + dst.data[i] = (uchar)(src->values[i] != 0); }