Merge pull request #8629 from lupustr3:pvlasov/icv2017u2_update2

8 years ago · 26be2402a3
parent 6ffab80169 11c2ffaf1c
commit 26be2402a3
21 changed files with 1372 additions and 1225 deletions
--- a/3rdparty/ippicv/ippicv.cmake
+++ b/3rdparty/ippicv/ippicv.cmake
@ -21,7 +21,7 @@ function(download_ippicv root_var)
      set(OPENCV_ICV_NAME "ippicv_2017u2_lnx_intel64_20170418.tgz")
      set(OPENCV_ICV_HASH "87cbdeb627415d8e4bc811156289fa3a")
    else()
-      set(OPENCV_ICV_NAME "ippicv_2017u2_lnx_ia32_20170406.tgz")
+      set(OPENCV_ICV_NAME "ippicv_2017u2_lnx_ia32_20170418.tgz")
      set(OPENCV_ICV_HASH "f2cece00d802d4dea86df52ed095257e")
    endif()
  elseif(WIN32 AND NOT ARM)
--- a/modules/core/include/opencv2/core/private.hpp
+++ b/modules/core/include/opencv2/core/private.hpp
@ -217,8 +217,6 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
 #define IPP_DISABLE_PERF_MAG_SSE42      1 // cv::magnitude optimizations problem
 #define IPP_DISABLE_PERF_BOX16S_SSE42   1 // cv::boxFilter optimizations problem

-#define IPP_DISABLE_BLOCK               0 // legacy switch
-
 #ifdef HAVE_IPP
 #include "ippversion.h"
 #ifndef IPP_VERSION_UPDATE // prior to 7.1
--- a/modules/core/src/convert.cpp
+++ b/modules/core/src/convert.cpp
@ -85,6 +85,66 @@ static MergeFunc getMergeFunc(int depth)
    return mergeTab[depth];
 }

+#ifdef HAVE_IPP
+#ifdef HAVE_IPP_IW
+extern "C" {
+IW_DECL(IppStatus) llwiCopySplit(const void *pSrc, int srcStep, void* const pDstOrig[], int dstStep,
+                                   IppiSize size, int typeSize, int channels);
+}
+#endif
+
+namespace cv {
+static bool ipp_split(const Mat& src, Mat* mv, int channels)
+{
+#ifdef HAVE_IPP_IW
+    CV_INSTRUMENT_REGION_IPP()
+
+    if(channels != 3 && channels != 4)
+        return false;
+
+    if(src.dims <= 2)
+    {
+        IppiSize size       = ippiSize(src.size());
+        void    *dstPtrs[4] = {NULL};
+        size_t   dstStep    = mv[0].step;
+        for(int i = 0; i < channels; i++)
+        {
+            dstPtrs[i] = mv[i].ptr();
+            if(dstStep != mv[i].step)
+                return false;
+        }
+
+        return CV_INSTRUMENT_FUN_IPP(llwiCopySplit, src.ptr(), (int)src.step, dstPtrs, (int)dstStep, size, (int)src.elemSize1(), channels) >= 0;
+    }
+    else
+    {
+        const Mat *arrays[5] = {NULL};
+        uchar     *ptrs[5]   = {NULL};
+        arrays[0] = &src;
+
+        for(int i = 1; i < channels; i++)
+        {
+            arrays[i] = &mv[i-1];
+        }
+
+        NAryMatIterator it(arrays, ptrs);
+        IppiSize size = { (int)it.size, 1 };
+
+        for( size_t i = 0; i < it.nplanes; i++, ++it )
+        {
+            if(CV_INSTRUMENT_FUN_IPP(llwiCopySplit, ptrs[0], 0, (void**)&ptrs[1], 0, size, (int)src.elemSize1(), channels) < 0)
+                return false;
+        }
+        return true;
+    }
+#else
+    CV_UNUSED(src); CV_UNUSED(mv); CV_UNUSED(channels);
+    return false;
+#endif
+}
+}
+#endif
+
 void cv::split(const Mat& src, Mat* mv)
 {
    CV_INSTRUMENT_REGION()
@ -96,6 +156,13 @@ void cv::split(const Mat& src, Mat* mv)
        return;
    }

+    for( k = 0; k < cn; k++ )
+    {
+        mv[k].create(src.dims, src.size, depth);
+    }
+
+    CV_IPP_RUN_FAST(ipp_split(src, mv, cn));
+
    SplitFunc func = getSplitFunc(depth);
    CV_Assert( func != 0 );

@ -108,7 +175,6 @@ void cv::split(const Mat& src, Mat* mv)
    arrays[0] = &src;
    for( k = 0; k < cn; k++ )
    {
-        mv[k].create(src.dims, src.size, depth);
        arrays[k+1] = &mv[k];
    }

@ -206,6 +272,66 @@ void cv::split(InputArray _m, OutputArrayOfArrays _mv)
    split(m, &dst[0]);
 }

+#ifdef HAVE_IPP
+#ifdef HAVE_IPP_IW
+extern "C" {
+IW_DECL(IppStatus) llwiCopyMerge(const void* const pSrc[], int srcStep, void *pDst, int dstStep,
+    IppiSize size, int typeSize, int channels);
+}
+#endif
+
+namespace cv {
+static bool ipp_merge(const Mat* mv, Mat& dst, int channels)
+{
+#ifdef HAVE_IPP_IW
+    CV_INSTRUMENT_REGION_IPP()
+
+    if(channels != 3 && channels != 4)
+        return false;
+
+    if(mv[0].dims <= 2)
+    {
+        IppiSize    size       = ippiSize(mv[0].size());
+        const void *srcPtrs[4] = {NULL};
+        size_t      srcStep    = mv[0].step;
+        for(int i = 0; i < channels; i++)
+        {
+            srcPtrs[i] = mv[i].ptr();
+            if(srcStep != mv[i].step)
+                return false;
+        }
+
+        return CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, srcPtrs, (int)srcStep, dst.ptr(), (int)dst.step, size, (int)mv[0].elemSize1(), channels) >= 0;
+    }
+    else
+    {
+        const Mat *arrays[5] = {NULL};
+        uchar     *ptrs[5]   = {NULL};
+        arrays[0] = &dst;
+
+        for(int i = 1; i < channels; i++)
+        {
+            arrays[i] = &mv[i-1];
+        }
+
+        NAryMatIterator it(arrays, ptrs);
+        IppiSize size = { (int)it.size, 1 };
+
+        for( size_t i = 0; i < it.nplanes; i++, ++it )
+        {
+            if(CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, (const void**)&ptrs[1], 0, ptrs[0], 0, size, (int)mv[0].elemSize1(), channels) < 0)
+                return false;
+        }
+        return true;
+    }
+#else
+    CV_UNUSED(dst); CV_UNUSED(mv); CV_UNUSED(channels);
+    return false;
+#endif
+}
+}
+#endif
+
 void cv::merge(const Mat* mv, size_t n, OutputArray _dst)
 {
    CV_INSTRUMENT_REGION()
@ -234,6 +360,8 @@ void cv::merge(const Mat* mv, size_t n, OutputArray _dst)
        return;
    }

+    CV_IPP_RUN_FAST(ipp_merge(mv, dst, (int)n));
+
    if( !allch1 )
    {
        AutoBuffer<int> pairs(cn*2);
@ -691,6 +819,59 @@ void cv::mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst,
    mixChannels(&buf[0], nsrc, &buf[nsrc], ndst, &fromTo[0], fromTo.size()/2);
 }

+#ifdef HAVE_IPP
+#ifdef HAVE_IPP_IW
+extern "C" {
+IW_DECL(IppStatus) llwiCopyMixed(const void *pSrc, int srcStep, int srcChannels, void *pDst, int dstStep, int dstChannels,
+    IppiSize size, int typeSize, int channelsShift);
+}
+#endif
+
+namespace cv
+{
+static bool ipp_extractInsertChannel(const Mat &src, Mat &dst, int channel)
+{
+#ifdef HAVE_IPP_IW
+    CV_INSTRUMENT_REGION_IPP()
+
+    int srcChannels = src.channels();
+    int dstChannels = dst.channels();
+
+    if(src.dims != dst.dims)
+        return false;
+
+    if(srcChannels == dstChannels || (srcChannels != 1 && dstChannels != 1))
+        return false;
+
+    if(src.dims <= 2)
+    {
+        IppiSize size = ippiSize(src.size());
+
+        return CV_INSTRUMENT_FUN_IPP(llwiCopyMixed, src.ptr(), (int)src.step, srcChannels, dst.ptr(), (int)dst.step, dstChannels, size, (int)src.elemSize1(), channel) >= 0;
+    }
+    else
+    {
+        const Mat      *arrays[] = {&dst, NULL};
+        uchar          *ptrs[2]  = {NULL};
+        NAryMatIterator it(arrays, ptrs);
+
+        IppiSize size = {(int)it.size, 1};
+
+        for( size_t i = 0; i < it.nplanes; i++, ++it )
+        {
+            if(CV_INSTRUMENT_FUN_IPP(llwiCopyMixed, ptrs[0], 0, srcChannels, ptrs[1], 0, dstChannels, size, (int)src.elemSize1(), channel) < 0)
+                return false;
+        }
+        return true;
+    }
+#else
+    CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(channel);
+    return false;
+#endif
+}
+}
+#endif
+
 void cv::extractChannel(InputArray _src, OutputArray _dst, int coi)
 {
    CV_INSTRUMENT_REGION()
@ -711,6 +892,9 @@ void cv::extractChannel(InputArray _src, OutputArray _dst, int coi)
    Mat src = _src.getMat();
    _dst.create(src.dims, &src.size[0], depth);
    Mat dst = _dst.getMat();
+
+    CV_IPP_RUN_FAST(ipp_extractInsertChannel(src, dst, coi))
+
    mixChannels(&src, 1, &dst, 1, ch, 1);
 }

@ -732,6 +916,9 @@ void cv::insertChannel(InputArray _src, InputOutputArray _dst, int coi)
    }

    Mat src = _src.getMat(), dst = _dst.getMat();
+
+    CV_IPP_RUN_FAST(ipp_extractInsertChannel(src, dst, coi))
+
    mixChannels(&src, 1, &dst, 1, ch, 1);
 }

@ -5264,6 +5451,72 @@ void cv::convertFp16( InputArray _src, OutputArray _dst)
    }
 }

+#ifdef HAVE_IPP
+namespace cv
+{
+static bool ipp_convertTo(Mat &src, Mat &dst, double alpha, double beta)
+{
+#ifdef HAVE_IPP_IW
+    CV_INSTRUMENT_REGION_IPP()
+
+    IppDataType srcDepth = ippiGetDataType(src.depth());
+    IppDataType dstDepth = ippiGetDataType(dst.depth());
+    int         channels = src.channels();
+
+    if(src.dims == 0)
+        return false;
+
+    ::ipp::IwiImage iwSrc;
+    ::ipp::IwiImage iwDst;
+
+    try
+    {
+        IppHintAlgorithm mode = ippAlgHintFast;
+        if(dstDepth == ipp64f ||
+            (dstDepth == ipp32f && (srcDepth == ipp32s || srcDepth == ipp64f)) ||
+            (dstDepth == ipp32s && (srcDepth == ipp32s || srcDepth == ipp64f)))
+            mode = ippAlgHintAccurate;
+
+        if(src.dims <= 2)
+        {
+            Size sz = getContinuousSize(src, dst, channels);
+
+            iwSrc.Init(ippiSize(sz), srcDepth, 1, NULL, (void*)src.ptr(), src.step);
+            iwDst.Init(ippiSize(sz), dstDepth, 1, NULL, (void*)dst.ptr(), dst.step);
+
+            CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwDst, alpha, beta, mode);
+        }
+        else
+        {
+            const Mat *arrays[] = {&src, &dst, NULL};
+            uchar     *ptrs[2]  = {NULL};
+            NAryMatIterator it(arrays, ptrs);
+
+            iwSrc.Init(ippiSize(it.size, 1), srcDepth, channels);
+            iwDst.Init(ippiSize(it.size, 1), dstDepth, channels);
+
+            for(size_t i = 0; i < it.nplanes; i++, ++it)
+            {
+                iwSrc.m_ptr  = ptrs[0];
+                iwDst.m_ptr  = ptrs[1];
+
+                CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwDst, alpha, beta, mode);
+            }
+        }
+    }
+    catch (::ipp::IwException)
+    {
+        return false;
+    }
+    return true;
+#else
+    CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(alpha); CV_UNUSED(beta);
+    return false;
+#endif
+}
+}
+#endif
+
 void cv::Mat::convertTo(OutputArray _dst, int _type, double alpha, double beta) const
 {
    CV_INSTRUMENT_REGION()
@ -5283,6 +5536,13 @@ void cv::Mat::convertTo(OutputArray _dst, int _type, double alpha, double beta)
    }

    Mat src = *this;
+    if( dims <= 2 )
+        _dst.create( size(), _type );
+    else
+        _dst.create( dims, size, _type );
+    Mat dst = _dst.getMat();
+
+    CV_IPP_RUN_FAST(ipp_convertTo(src, dst, alpha, beta ));

    BinaryFunc func = noScale ? getConvertFunc(sdepth, ddepth) : getConvertScaleFunc(sdepth, ddepth);
    double scale[] = {alpha, beta};
@ -5291,15 +5551,12 @@ void cv::Mat::convertTo(OutputArray _dst, int _type, double alpha, double beta)

    if( dims <= 2 )
    {
-        _dst.create( size(), _type );
-        Mat dst = _dst.getMat();
        Size sz = getContinuousSize(src, dst, cn);
+
        func( src.data, src.step, 0, 0, dst.data, dst.step, sz, scale );
    }
    else
    {
-        _dst.create( dims, size, _type );
-        Mat dst = _dst.getMat();
        const Mat* arrays[] = {&src, &dst, 0};
        uchar* ptrs[2];
        NAryMatIterator it(arrays, ptrs);
@ -5436,9 +5693,9 @@ static bool openvx_LUT(Mat src, Mat dst, Mat _lut)
 #endif

 #if defined(HAVE_IPP)
+#if !IPP_DISABLE_PERF_LUT // there are no performance benefits (PR #2653)
 namespace ipp {

-#if IPP_DISABLE_BLOCK // there are no performance benefits (PR #2653)
 class IppLUTParallelBody_LUTC1 : public ParallelLoopBody
 {
 public:
@ -5447,25 +5704,17 @@ public:
    const Mat& lut_;
    Mat& dst_;

-    typedef IppStatus (*IppFn)(const Ipp8u* pSrc, int srcStep, void* pDst, int dstStep,
-                          IppiSize roiSize, const void* pTable, int nBitSize);
-    IppFn fn;
-
    int width;
+    size_t elemSize1;

    IppLUTParallelBody_LUTC1(const Mat& src, const Mat& lut, Mat& dst, bool* _ok)
        : ok(_ok), src_(src), lut_(lut), dst_(dst)
    {
        width = dst.cols * dst.channels();
+        elemSize1 = CV_ELEM_SIZE1(dst.depth());

-        size_t elemSize1 = CV_ELEM_SIZE1(dst.depth());
-
-        fn =
-                elemSize1 == 1 ? (IppFn)ippiLUTPalette_8u_C1R :
-                elemSize1 == 4 ? (IppFn)ippiLUTPalette_8u32u_C1R :
-                NULL;
-
-        *ok = (fn != NULL);
+        CV_DbgAssert(elemSize1 == 1 || elemSize1 == 4);
+        *ok = true;
    }

    void operator()( const cv::Range& range ) const
@ -5481,19 +5730,22 @@ public:

        IppiSize sz = { width, dst.rows };

-        CV_DbgAssert(fn != NULL);
-        if (fn(src.data, (int)src.step[0], dst.data, (int)dst.step[0], sz, lut_.data, 8) < 0)
+        if (elemSize1 == 1)
+        {
+            if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u_C1R, (const Ipp8u*)src.data, (int)src.step[0], dst.data, (int)dst.step[0], sz, lut_.data, 8) >= 0)
+                return;
+        }
+        else if (elemSize1 == 4)
        {
-            setIppErrorStatus();
-            *ok = false;
+            if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u32u_C1R, (const Ipp8u*)src.data, (int)src.step[0], (Ipp32u*)dst.data, (int)dst.step[0], sz, (Ipp32u*)lut_.data, 8) >= 0)
+                return;
        }
-        CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
+        *ok = false;
    }
 private:
    IppLUTParallelBody_LUTC1(const IppLUTParallelBody_LUTC1&);
    IppLUTParallelBody_LUTC1& operator=(const IppLUTParallelBody_LUTC1&);
 };
-#endif

 class IppLUTParallelBody_LUTCN : public ParallelLoopBody
 {
@ -5527,21 +5779,13 @@ public:
        {
            IppStatus status = CV_INSTRUMENT_FUN_IPP(ippiCopy_8u_C3P3R, lut.ptr(), (int)lut.step[0], lutTable, (int)lut.step[0], sz256);
            if (status < 0)
-            {
-                setIppErrorStatus();
                return;
-            }
-            CV_IMPL_ADD(CV_IMPL_IPP);
        }
        else if (lutcn == 4)
        {
            IppStatus status = CV_INSTRUMENT_FUN_IPP(ippiCopy_8u_C4P4R, lut.ptr(), (int)lut.step[0], lutTable, (int)lut.step[0], sz256);
            if (status < 0)
-            {
-                setIppErrorStatus();
                return;
-            }
-            CV_IMPL_ADD(CV_IMPL_IPP);
        }

        *ok = true;
@ -5568,25 +5812,14 @@ public:

        if (lutcn == 3)
        {
-            if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u_C3R,
-                    src.ptr(), (int)src.step[0], dst.ptr(), (int)dst.step[0],
-                    ippiSize(dst.size()), lutTable, 8) >= 0)
-            {
-                CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
+            if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u_C3R, src.ptr(), (int)src.step[0], dst.ptr(), (int)dst.step[0], ippiSize(dst.size()), lutTable, 8) >= 0)
                return;
-            }
        }
        else if (lutcn == 4)
        {
-            if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u_C4R,
-                    src.ptr(), (int)src.step[0], dst.ptr(), (int)dst.step[0],
-                    ippiSize(dst.size()), lutTable, 8) >= 0)
-            {
-                CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
+            if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u_C4R, src.ptr(), (int)src.step[0], dst.ptr(), (int)dst.step[0], ippiSize(dst.size()), lutTable, 8) >= 0)
                return;
-            }
        }
-        setIppErrorStatus();
        *ok = false;
    }
 private:
@ -5608,15 +5841,13 @@ static bool ipp_lut(Mat &src, Mat &lut, Mat &dst)
    Ptr<ParallelLoopBody> body;

    size_t elemSize1 = CV_ELEM_SIZE1(dst.depth());
-#if IPP_DISABLE_BLOCK // there are no performance benefits (PR #2653)
+
    if (lutcn == 1)
    {
        ParallelLoopBody* p = new ipp::IppLUTParallelBody_LUTC1(src, lut, dst, &ok);
        body.reset(p);
    }
-    else
-#endif
-    if ((lutcn == 3 || lutcn == 4) && elemSize1 == 1)
+    else if ((lutcn == 3 || lutcn == 4) && elemSize1 == 1)
    {
        ParallelLoopBody* p = new ipp::IppLUTParallelBody_LUTCN(src, lut, dst, &ok);
        body.reset(p);
@ -5635,6 +5866,8 @@ static bool ipp_lut(Mat &src, Mat &lut, Mat &dst)

    return false;
 }
+
+#endif
 #endif // IPP

 class LUTParallelBody : public ParallelLoopBody
@ -5703,7 +5936,9 @@ void cv::LUT( InputArray _src, InputArray _lut, OutputArray _dst )
    CV_OVX_RUN(true,
               openvx_LUT(src, dst, lut))

+#if !IPP_DISABLE_PERF_LUT
    CV_IPP_RUN(_src.dims() <= 2, ipp_lut(src, lut, dst));
+#endif

    if (_src.dims() <= 2)
    {
--- a/modules/core/src/copy.cpp
+++ b/modules/core/src/copy.cpp
@ -49,6 +49,19 @@
 #include "precomp.hpp"
 #include "opencl_kernels_core.hpp"

+#ifdef HAVE_IPP_IW
+extern "C" {
+IW_DECL(IppStatus) llwiCopyMask(const void *pSrc, int srcStep, void *pDst, int dstStep,
+    IppiSize size, int typeSize, int channels, const Ipp8u *pMask, int maskStep);
+IW_DECL(IppStatus) llwiSet(const double *pValue, void *pDst, int dstStep,
+    IppiSize size, IppDataType dataType, int channels);
+IW_DECL(IppStatus) llwiSetMask(const double *pValue, void *pDst, int dstStep,
+    IppiSize size, IppDataType dataType, int channels, const Ipp8u *pMask, int maskStep);
+IW_DECL(IppStatus) llwiCopyMakeBorder(const void *pSrc, IppSizeL srcStep, void *pDst, IppSizeL dstStep,
+    IppiSizeL size, IppDataType dataType, int channels, IppiBorderSize *pBorderSize, IppiBorderType border, const Ipp64f *pBorderVal);
+}
+#endif
+
 namespace cv
 {

@ -326,6 +339,42 @@ void Mat::copyTo( OutputArray _dst ) const
    }
 }

+#ifdef HAVE_IPP
+static bool ipp_copyTo(const Mat &src, Mat &dst, const Mat &mask)
+{
+#ifdef HAVE_IPP_IW
+    CV_INSTRUMENT_REGION_IPP()
+
+    if(mask.channels() > 1 && mask.depth() != CV_8U)
+        return false;
+
+    if (src.dims <= 2)
+    {
+        IppiSize size = ippiSize(src.size());
+        return CV_INSTRUMENT_FUN_IPP(llwiCopyMask, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, size, (int)src.elemSize1(), src.channels(), mask.ptr(), (int)mask.step) >= 0;
+    }
+    else
+    {
+        const Mat      *arrays[] = {&src, &dst, &mask, NULL};
+        uchar          *ptrs[3]  = {NULL};
+        NAryMatIterator it(arrays, ptrs);
+
+        IppiSize size = ippiSize(it.size, 1);
+
+        for (size_t i = 0; i < it.nplanes; i++, ++it)
+        {
+            if(CV_INSTRUMENT_FUN_IPP(llwiCopyMask, ptrs[0], 0, ptrs[1], 0, size, (int)src.elemSize1(), src.channels(), ptrs[2], 0) < 0)
+                return false;
+        }
+        return true;
+    }
+#else
+    CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(mask);
+    return false;
+#endif
+}
+#endif
+
 void Mat::copyTo( OutputArray _dst, InputArray _mask ) const
 {
    CV_INSTRUMENT_REGION()
@ -340,9 +389,10 @@ void Mat::copyTo( OutputArray _dst, InputArray _mask ) const
    int cn = channels(), mcn = mask.channels();
    CV_Assert( mask.depth() == CV_8U && (mcn == 1 || mcn == cn) );
    bool colorMask = mcn > 1;
-
-    size_t esz = colorMask ? elemSize1() : elemSize();
-    BinaryFunc copymask = getCopyMaskFunc(esz);
+    if( dims <= 2 )
+    {
+        CV_Assert( size() == mask.size() );
+    }

    uchar* data0 = _dst.getMat().data;
    _dst.create( dims, size, type() );
@ -351,9 +401,13 @@ void Mat::copyTo( OutputArray _dst, InputArray _mask ) const
    if( dst.data != data0 ) // do not leave dst uninitialized
        dst = Scalar(0);

+    CV_IPP_RUN_FAST(ipp_copyTo(*this, dst, mask))
+
+    size_t esz = colorMask ? elemSize1() : elemSize();
+    BinaryFunc copymask = getCopyMaskFunc(esz);
+
    if( dims <= 2 )
    {
-        CV_Assert( size() == mask.size() );
        Size sz = getContinuousSize(*this, dst, mask, mcn);
        copymask(data, step, mask.data, mask.step, dst.data, dst.step, sz, &esz);
        return;
@ -380,36 +434,6 @@ Mat& Mat::operator = (const Scalar& s)

    if( is[0] == 0 && is[1] == 0 && is[2] == 0 && is[3] == 0 )
    {
-#if defined HAVE_IPP && IPP_DISABLE_BLOCK
-        CV_IPP_CHECK()
-        {
-            if (dims <= 2 || isContinuous())
-            {
-                IppiSize roisize = { cols, rows };
-                if (isContinuous())
-                {
-                    roisize.width = (int)total();
-                    roisize.height = 1;
-
-                    if (ippsZero_8u(data, static_cast<int>(roisize.width * elemSize())) >= 0)
-                    {
-                        CV_IMPL_ADD(CV_IMPL_IPP)
-                        return *this;
-                    }
-                    setIppErrorStatus();
-                }
-                roisize.width *= (int)elemSize();
-
-                if (ippiSet_8u_C1R(0, data, (int)step, roisize) >= 0)
-                {
-                    CV_IMPL_ADD(CV_IMPL_IPP)
-                    return *this;
-                }
-                setIppErrorStatus();
-            }
-        }
-#endif
-
        for( size_t i = 0; i < it.nplanes; i++, ++it )
            memset( dptr, 0, elsize );
    }
@ -437,89 +461,55 @@ Mat& Mat::operator = (const Scalar& s)
    return *this;
 }

-#if defined HAVE_IPP
-static bool ipp_Mat_setTo(Mat *src, Mat &value, Mat &mask)
+#ifdef HAVE_IPP
+static bool ipp_Mat_setTo_Mat(Mat &dst, Mat &_val, Mat &mask)
 {
+#ifdef HAVE_IPP_IW
    CV_INSTRUMENT_REGION_IPP()

-    int cn = src->channels(), depth0 = src->depth();
+    if(mask.empty())
+        return false;

-    if (!mask.empty() && (src->dims <= 2 || (src->isContinuous() && mask.isContinuous())) &&
-            (/*depth0 == CV_8U ||*/ depth0 == CV_16U || depth0 == CV_16S || depth0 == CV_32S || depth0 == CV_32F) &&
-            (cn == 1 || cn == 3 || cn == 4))
+    if(mask.depth() != CV_8U || mask.channels() > 1)
+        return false;
+
+    if(dst.channels() > 4)
+        return false;
+
+    if(dst.dims <= 2)
    {
-        uchar _buf[32];
-        void * buf = _buf;
-        convertAndUnrollScalar( value, src->type(), _buf, 1 );
+        IppiSize       size     = ippiSize(dst.size());
+        IppDataType    dataType = ippiGetDataType(dst.depth());
+        ::ipp::IwValue s;
+        convertAndUnrollScalar(_val, CV_MAKETYPE(CV_64F, dst.channels()), (uchar*)((Ipp64f*)s), 1);

-        IppStatus status = (IppStatus)-1;
-        IppiSize roisize = { src->cols, src->rows };
-        int mstep = (int)mask.step[0], dstep = (int)src->step[0];
+        return CV_INSTRUMENT_FUN_IPP(llwiSetMask, s, dst.ptr(), (int)dst.step, size, dataType, dst.channels(), mask.ptr(), (int)mask.step) >= 0;
+    }
+    else
+    {
+        const Mat      *arrays[] = {&dst, mask.empty()?NULL:&mask, NULL};
+        uchar          *ptrs[2]  = {NULL};
+        NAryMatIterator it(arrays, ptrs);

-        if (src->isContinuous() && mask.isContinuous())
-        {
-            roisize.width = (int)src->total();
-            roisize.height = 1;
-        }
+        IppiSize       size     = {(int)it.size, 1};
+        IppDataType    dataType = ippiGetDataType(dst.depth());
+        ::ipp::IwValue s;
+        convertAndUnrollScalar(_val, CV_MAKETYPE(CV_64F, dst.channels()), (uchar*)((Ipp64f*)s), 1);

-        if (cn == 1)
+        for( size_t i = 0; i < it.nplanes; i++, ++it)
        {
-            /*if (depth0 == CV_8U)
-                status = ippiSet_8u_C1MR(*(Ipp8u *)buf, (Ipp8u *)data, dstep, roisize, mask.data, mstep);
-            else*/ if (depth0 == CV_16U)
-                status = CV_INSTRUMENT_FUN_IPP(ippiSet_16u_C1MR, *(Ipp16u *)buf, (Ipp16u *)src->data, dstep, roisize, mask.data, mstep);
-            else if (depth0 == CV_16S)
-                status = CV_INSTRUMENT_FUN_IPP(ippiSet_16s_C1MR, *(Ipp16s *)buf, (Ipp16s *)src->data, dstep, roisize, mask.data, mstep);
-            else if (depth0 == CV_32S)
-                status = CV_INSTRUMENT_FUN_IPP(ippiSet_32s_C1MR, *(Ipp32s *)buf, (Ipp32s *)src->data, dstep, roisize, mask.data, mstep);
-            else if (depth0 == CV_32F)
-                status = CV_INSTRUMENT_FUN_IPP(ippiSet_32f_C1MR, *(Ipp32f *)buf, (Ipp32f *)src->data, dstep, roisize, mask.data, mstep);
+            if(CV_INSTRUMENT_FUN_IPP(llwiSetMask, s, ptrs[0], 0, size, dataType, dst.channels(), ptrs[1], 0) < 0)
+                return false;
        }
-        else if (cn == 3 || cn == 4)
-        {
-
-#define IPP_SET(ippfavor, ippcn) \
-            do \
-            { \
-                typedef Ipp##ippfavor ipptype; \
-                ipptype ippvalue[4] = { ((ipptype *)buf)[0], ((ipptype *)buf)[1], ((ipptype *)buf)[2], ((ipptype *)buf)[3] }; \
-                status = CV_INSTRUMENT_FUN_IPP(ippiSet_##ippfavor##_C##ippcn##MR, ippvalue, (ipptype *)src->data, dstep, roisize, mask.data, mstep); \
-            } while ((void)0, 0)
-
-#define IPP_SET_CN(ippcn) \
-            do \
-            { \
-                if (cn == ippcn) \
-                { \
-                    /*if (depth0 == CV_8U) \
-                        IPP_SET(8u, ippcn); \
-                    else*/ if (depth0 == CV_16U) \
-                        IPP_SET(16u, ippcn); \
-                    else if (depth0 == CV_16S) \
-                        IPP_SET(16s, ippcn); \
-                    else if (depth0 == CV_32S) \
-                        IPP_SET(32s, ippcn); \
-                    else if (depth0 == CV_32F) \
-                        IPP_SET(32f, ippcn); \
-                } \
-            } while ((void)0, 0)
-
-            IPP_SET_CN(3);
-            IPP_SET_CN(4);
-
-#undef IPP_SET_CN
-#undef IPP_SET
-        }
-
-        if (status >= 0)
-            return true;
+        return true;
    }
-
+#else
+    CV_UNUSED(dst); CV_UNUSED(_val); CV_UNUSED(mask);
    return false;
+#endif
 }
 #endif

-
 Mat& Mat::setTo(InputArray _value, InputArray _mask)
 {
    CV_INSTRUMENT_REGION()
@ -532,7 +522,7 @@ Mat& Mat::setTo(InputArray _value, InputArray _mask)
    CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::MAT ));
    CV_Assert( mask.empty() || (mask.type() == CV_8U && size == mask.size) );

-    CV_IPP_RUN_FAST(ipp_Mat_setTo((cv::Mat*)this, value, mask), *this)
+    CV_IPP_RUN_FAST(ipp_Mat_setTo_Mat(*this, value, mask), *this)

    size_t esz = elemSize();
    BinaryFunc copymask = getCopyMaskFunc(esz);
@ -707,73 +697,36 @@ static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
 #endif

 #if defined HAVE_IPP
-static bool ipp_flip( Mat &src, Mat &dst, int flip_mode )
+static bool ipp_flip(Mat &src, Mat &dst, int flip_mode)
 {
+#ifdef HAVE_IPP_IW
    CV_INSTRUMENT_REGION_IPP()

-    int type = src.type();
-
-    typedef IppStatus (CV_STDCALL * IppiMirror)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize, IppiAxis flip);
-    typedef IppStatus (CV_STDCALL * IppiMirrorI)(const void * pSrcDst, int srcDstStep, IppiSize roiSize, IppiAxis flip);
-    IppiMirror ippiMirror = 0;
-    IppiMirrorI ippiMirror_I = 0;
-
-    if (src.data == dst.data)
-    {
-        CV_SUPPRESS_DEPRECATED_START
-        ippiMirror_I =
-            type == CV_8UC1 ? (IppiMirrorI)ippiMirror_8u_C1IR :
-            type == CV_8UC3 ? (IppiMirrorI)ippiMirror_8u_C3IR :
-            type == CV_8UC4 ? (IppiMirrorI)ippiMirror_8u_C4IR :
-            type == CV_16UC1 ? (IppiMirrorI)ippiMirror_16u_C1IR :
-            type == CV_16UC3 ? (IppiMirrorI)ippiMirror_16u_C3IR :
-            type == CV_16UC4 ? (IppiMirrorI)ippiMirror_16u_C4IR :
-            type == CV_16SC1 ? (IppiMirrorI)ippiMirror_16s_C1IR :
-            type == CV_16SC3 ? (IppiMirrorI)ippiMirror_16s_C3IR :
-            type == CV_16SC4 ? (IppiMirrorI)ippiMirror_16s_C4IR :
-            type == CV_32SC1 ? (IppiMirrorI)ippiMirror_32s_C1IR :
-            type == CV_32SC3 ? (IppiMirrorI)ippiMirror_32s_C3IR :
-            type == CV_32SC4 ? (IppiMirrorI)ippiMirror_32s_C4IR :
-            type == CV_32FC1 ? (IppiMirrorI)ippiMirror_32f_C1IR :
-            type == CV_32FC3 ? (IppiMirrorI)ippiMirror_32f_C3IR :
-            type == CV_32FC4 ? (IppiMirrorI)ippiMirror_32f_C4IR : 0;
-        CV_SUPPRESS_DEPRECATED_END
-    }
+    IppiAxis ippMode;
+    if(flip_mode < 0)
+        ippMode = ippAxsBoth;
+    else if(flip_mode == 0)
+        ippMode = ippAxsHorizontal;
    else
-    {
-        ippiMirror =
-            type == CV_8UC1 ? (IppiMirror)ippiMirror_8u_C1R :
-            type == CV_8UC3 ? (IppiMirror)ippiMirror_8u_C3R :
-            type == CV_8UC4 ? (IppiMirror)ippiMirror_8u_C4R :
-            type == CV_16UC1 ? (IppiMirror)ippiMirror_16u_C1R :
-            type == CV_16UC3 ? (IppiMirror)ippiMirror_16u_C3R :
-            type == CV_16UC4 ? (IppiMirror)ippiMirror_16u_C4R :
-            type == CV_16SC1 ? (IppiMirror)ippiMirror_16s_C1R :
-            type == CV_16SC3 ? (IppiMirror)ippiMirror_16s_C3R :
-            type == CV_16SC4 ? (IppiMirror)ippiMirror_16s_C4R :
-            type == CV_32SC1 ? (IppiMirror)ippiMirror_32s_C1R :
-            type == CV_32SC3 ? (IppiMirror)ippiMirror_32s_C3R :
-            type == CV_32SC4 ? (IppiMirror)ippiMirror_32s_C4R :
-            type == CV_32FC1 ? (IppiMirror)ippiMirror_32f_C1R :
-            type == CV_32FC3 ? (IppiMirror)ippiMirror_32f_C3R :
-            type == CV_32FC4 ? (IppiMirror)ippiMirror_32f_C4R : 0;
-    }
-    IppiAxis axis = flip_mode == 0 ? ippAxsHorizontal :
-        flip_mode > 0 ? ippAxsVertical : ippAxsBoth;
-    IppiSize roisize = { dst.cols, dst.rows };
+        ippMode = ippAxsVertical;

-    if (ippiMirror != 0)
+    try
    {
-        if (CV_INSTRUMENT_FUN_IPP(ippiMirror, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, ippiSize(src.cols, src.rows), axis) >= 0)
-            return true;
+        ::ipp::IwiImage iwSrc = ippiGetImage(src);
+        ::ipp::IwiImage iwDst = ippiGetImage(dst);
+
+        CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, &iwSrc, &iwDst, ippMode);
    }
-    else if (ippiMirror_I != 0)
+    catch(::ipp::IwException)
    {
-        if (CV_INSTRUMENT_FUN_IPP(ippiMirror_I, dst.ptr(), (int)dst.step, roisize, axis) >= 0)
-            return true;
+        return false;
    }

+    return true;
+#else
+    CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(flip_mode);
    return false;
+#endif
 }
 #endif

@ -1178,7 +1131,41 @@ static bool ocl_copyMakeBorder( InputArray _src, OutputArray _dst, int top, int
 }

 }
+#endif
+
+#ifdef HAVE_IPP
+namespace cv {
+
+static bool ipp_copyMakeBorder( Mat &_src, Mat &_dst, int top, int bottom,
+                                int left, int right, int _borderType, const Scalar& value )
+{
+#if defined HAVE_IPP_IW && !IPP_DISABLE_PERF_COPYMAKE
+    CV_INSTRUMENT_REGION_IPP()

+    ::ipp::IwiBorderSize borderSize(left, top, right, bottom);
+    ::ipp::IwiSize       size(_src.cols, _src.rows);
+    IppDataType          dataType   = ippiGetDataType(_src.depth());
+    IppiBorderType       borderType = ippiGetBorderType(_borderType);
+    if((int)borderType == -1)
+        return false;
+
+    if(_src.dims > 2)
+        return false;
+
+    Rect dstRect(borderSize.borderLeft, borderSize.borderTop,
+        _dst.cols - borderSize.borderRight - borderSize.borderLeft,
+        _dst.rows - borderSize.borderBottom - borderSize.borderTop);
+    Mat  subDst = Mat(_dst, dstRect);
+    Mat *pSrc   = &_src;
+
+    return CV_INSTRUMENT_FUN_IPP(llwiCopyMakeBorder, pSrc->ptr(), pSrc->step, subDst.ptr(), subDst.step, size, dataType, _src.channels(), &borderSize, borderType, &value[0]) >= 0;
+#else
+    CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(top); CV_UNUSED(bottom); CV_UNUSED(left); CV_UNUSED(right);
+    CV_UNUSED(_borderType); CV_UNUSED(value);
+    return false;
+#endif
+}
+}
 #endif

 void cv::copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom,
@ -1222,120 +1209,7 @@ void cv::copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom,

    borderType &= ~BORDER_ISOLATED;

-#if defined HAVE_IPP && IPP_DISABLE_BLOCK
-    CV_IPP_CHECK()
-    {
-        typedef IppStatus (CV_STDCALL * ippiCopyMakeBorder)(const void * pSrc, int srcStep, IppiSize srcRoiSize, void * pDst,
-                                                            int dstStep, IppiSize dstRoiSize, int topBorderHeight, int leftBorderWidth);
-        typedef IppStatus (CV_STDCALL * ippiCopyMakeBorderI)(const void * pSrc, int srcDstStep, IppiSize srcRoiSize, IppiSize dstRoiSize,
-                                                             int topBorderHeight, int leftborderwidth);
-        typedef IppStatus (CV_STDCALL * ippiCopyConstBorder)(const void * pSrc, int srcStep, IppiSize srcRoiSize, void * pDst, int dstStep,
-                                                             IppiSize dstRoiSize, int topBorderHeight, int leftBorderWidth, void * value);
-
-        IppiSize srcRoiSize = { src.cols, src.rows }, dstRoiSize = { dst.cols, dst.rows };
-        ippiCopyMakeBorder ippFunc = 0;
-        ippiCopyMakeBorderI ippFuncI = 0;
-        ippiCopyConstBorder ippFuncConst = 0;
-        bool inplace = dst.datastart == src.datastart;
-
-        if (borderType == BORDER_CONSTANT)
-        {
-             ippFuncConst =
-    //             type == CV_8UC1 ? (ippiCopyConstBorder)ippiCopyConstBorder_8u_C1R : bug in IPP 8.1
-                 type == CV_16UC1 ? (ippiCopyConstBorder)ippiCopyConstBorder_16u_C1R :
-    //             type == CV_16SC1 ? (ippiCopyConstBorder)ippiCopyConstBorder_16s_C1R : bug in IPP 8.1
-    //             type == CV_32SC1 ? (ippiCopyConstBorder)ippiCopyConstBorder_32s_C1R : bug in IPP 8.1
-    //             type == CV_32FC1 ? (ippiCopyConstBorder)ippiCopyConstBorder_32f_C1R : bug in IPP 8.1
-                 type == CV_8UC3 ? (ippiCopyConstBorder)ippiCopyConstBorder_8u_C3R :
-                 type == CV_16UC3 ? (ippiCopyConstBorder)ippiCopyConstBorder_16u_C3R :
-                 type == CV_16SC3 ? (ippiCopyConstBorder)ippiCopyConstBorder_16s_C3R :
-                 type == CV_32SC3 ? (ippiCopyConstBorder)ippiCopyConstBorder_32s_C3R :
-                 type == CV_32FC3 ? (ippiCopyConstBorder)ippiCopyConstBorder_32f_C3R :
-                 type == CV_8UC4 ? (ippiCopyConstBorder)ippiCopyConstBorder_8u_C4R :
-                 type == CV_16UC4 ? (ippiCopyConstBorder)ippiCopyConstBorder_16u_C4R :
-                 type == CV_16SC4 ? (ippiCopyConstBorder)ippiCopyConstBorder_16s_C4R :
-                 type == CV_32SC4 ? (ippiCopyConstBorder)ippiCopyConstBorder_32s_C4R :
-                 type == CV_32FC4 ? (ippiCopyConstBorder)ippiCopyConstBorder_32f_C4R : 0;
-        }
-        else if (borderType == BORDER_WRAP)
-        {
-            if (inplace)
-            {
-                CV_SUPPRESS_DEPRECATED_START
-                ippFuncI =
-                    type == CV_32SC1 ? (ippiCopyMakeBorderI)ippiCopyWrapBorder_32s_C1IR :
-                    type == CV_32FC1 ? (ippiCopyMakeBorderI)ippiCopyWrapBorder_32s_C1IR : 0;
-                CV_SUPPRESS_DEPRECATED_END
-            }
-            else
-            {
-                ippFunc =
-                    type == CV_32SC1 ? (ippiCopyMakeBorder)ippiCopyWrapBorder_32s_C1R :
-                    type == CV_32FC1 ? (ippiCopyMakeBorder)ippiCopyWrapBorder_32s_C1R : 0;
-            }
-        }
-        else if (borderType == BORDER_REPLICATE)
-        {
-            if (inplace)
-            {
-                CV_SUPPRESS_DEPRECATED_START
-                ippFuncI =
-                    type == CV_8UC1 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_8u_C1IR :
-                    type == CV_16UC1 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_16u_C1IR :
-                    type == CV_16SC1 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_16s_C1IR :
-                    type == CV_32SC1 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_32s_C1IR :
-                    type == CV_32FC1 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_32f_C1IR :
-                    type == CV_8UC3 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_8u_C3IR :
-                    type == CV_16UC3 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_16u_C3IR :
-                    type == CV_16SC3 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_16s_C3IR :
-                    type == CV_32SC3 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_32s_C3IR :
-                    type == CV_32FC3 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_32f_C3IR :
-                    type == CV_8UC4 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_8u_C4IR :
-                    type == CV_16UC4 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_16u_C4IR :
-                    type == CV_16SC4 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_16s_C4IR :
-                    type == CV_32SC4 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_32s_C4IR :
-                    type == CV_32FC4 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_32f_C4IR : 0;
-                CV_SUPPRESS_DEPRECATED_END
-            }
-            else
-            {
-                 ippFunc =
-                     type == CV_8UC1 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_8u_C1R :
-                     type == CV_16UC1 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_16u_C1R :
-                     type == CV_16SC1 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_16s_C1R :
-                     type == CV_32SC1 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_32s_C1R :
-                     type == CV_32FC1 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_32f_C1R :
-                     type == CV_8UC3 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_8u_C3R :
-                     type == CV_16UC3 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_16u_C3R :
-                     type == CV_16SC3 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_16s_C3R :
-                     type == CV_32SC3 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_32s_C3R :
-                     type == CV_32FC3 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_32f_C3R :
-                     type == CV_8UC4 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_8u_C4R :
-                     type == CV_16UC4 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_16u_C4R :
-                     type == CV_16SC4 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_16s_C4R :
-                     type == CV_32SC4 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_32s_C4R :
-                     type == CV_32FC4 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_32f_C4R : 0;
-            }
-        }
-
-        if (ippFunc || ippFuncI || ippFuncConst)
-        {
-            uchar scbuf[32];
-            scalarToRawData(value, scbuf, type);
-
-            if ( (ippFunc && ippFunc(src.data, (int)src.step, srcRoiSize, dst.data, (int)dst.step, dstRoiSize, top, left) >= 0) ||
-                 (ippFuncI && ippFuncI(src.data, (int)src.step, srcRoiSize, dstRoiSize, top, left) >= 0) ||
-                 (ippFuncConst && ippFuncConst(src.data, (int)src.step, srcRoiSize, dst.data, (int)dst.step,
-                                               dstRoiSize, top, left, scbuf) >= 0))
-            {
-                CV_IMPL_ADD(CV_IMPL_IPP);
-                return;
-            }
-
-            setIppErrorStatus();
-        }
-    }
-#endif
+    CV_IPP_RUN_FAST(ipp_copyMakeBorder(src, dst, top, bottom, left, right, borderType, value))

    if( borderType != BORDER_CONSTANT )
        copyMakeBorder_8u( src.ptr(), src.step, src.size(),
--- a/modules/core/src/mathfuncs.cpp
+++ b/modules/core/src/mathfuncs.cpp
@ -497,6 +497,65 @@ static bool ocl_polarToCart( InputArray _mag, InputArray _angle,

 #endif

+#ifdef HAVE_IPP
+static bool ipp_polarToCart(Mat &mag, Mat &angle, Mat &x, Mat &y)
+{
+    CV_INSTRUMENT_REGION_IPP()
+
+    int depth = angle.depth();
+    if(depth != CV_32F && depth != CV_64F)
+        return false;
+
+    if(angle.dims <= 2)
+    {
+        int len = (int)(angle.cols*angle.channels());
+
+        if(depth == CV_32F)
+        {
+            for (int h = 0; h < angle.rows; h++)
+            {
+                if(CV_INSTRUMENT_FUN_IPP(ippsPolarToCart_32f, (const float*)mag.ptr(h), (const float*)angle.ptr(h), (float*)x.ptr(h), (float*)y.ptr(h), len) < 0)
+                    return false;
+            }
+        }
+        else
+        {
+            for (int h = 0; h < angle.rows; h++)
+            {
+                if(CV_INSTRUMENT_FUN_IPP(ippsPolarToCart_64f, (const double*)mag.ptr(h), (const double*)angle.ptr(h), (double*)x.ptr(h), (double*)y.ptr(h), len) < 0)
+                    return false;
+            }
+        }
+        return true;
+    }
+    else
+    {
+        const Mat      *arrays[] = {&mag, &angle, &x, &y, NULL};
+        uchar          *ptrs[4]  = {NULL};
+        NAryMatIterator it(arrays, ptrs);
+        int len = (int)(it.size*angle.channels());
+
+        if(depth == CV_32F)
+        {
+            for (size_t i = 0; i < it.nplanes; i++, ++it)
+            {
+                if(CV_INSTRUMENT_FUN_IPP(ippsPolarToCart_32f, (const float*)ptrs[0], (const float*)ptrs[1], (float*)ptrs[2], (float*)ptrs[3], len) < 0)
+                    return false;
+            }
+        }
+        else
+        {
+            for (size_t i = 0; i < it.nplanes; i++, ++it)
+            {
+                if(CV_INSTRUMENT_FUN_IPP(ippsPolarToCart_64f, (const double*)ptrs[0], (const double*)ptrs[1], (double*)ptrs[2], (double*)ptrs[3], len) < 0)
+                    return false;
+            }
+        }
+        return true;
+    }
+}
+#endif
+
 void polarToCart( InputArray src1, InputArray src2,
                  OutputArray dst1, OutputArray dst2, bool angleInDegrees )
 {
@ -514,28 +573,7 @@ void polarToCart( InputArray src1, InputArray src2,
    dst2.create( Angle.dims, Angle.size, type );
    Mat X = dst1.getMat(), Y = dst2.getMat();

-#if defined(HAVE_IPP)
-    CV_IPP_CHECK()
-    {
-        if (Mag.isContinuous() && Angle.isContinuous() && X.isContinuous() && Y.isContinuous() && !angleInDegrees)
-        {
-            typedef IppStatus (CV_STDCALL * IppsPolarToCart)(const void * pSrcMagn, const void * pSrcPhase,
-                                                             void * pDstRe, void * pDstIm, int len);
-            IppsPolarToCart ippsPolarToCart =
-            depth == CV_32F ? (IppsPolarToCart)ippsPolarToCart_32f :
-            depth == CV_64F ? (IppsPolarToCart)ippsPolarToCart_64f : 0;
-            CV_Assert(ippsPolarToCart != 0);
-
-            IppStatus status = CV_INSTRUMENT_FUN_IPP(ippsPolarToCart, Mag.ptr(), Angle.ptr(), X.ptr(), Y.ptr(), static_cast<int>(cn * X.total()));
-            if (status >= 0)
-            {
-                CV_IMPL_ADD(CV_IMPL_IPP);
-                return;
-            }
-            setIppErrorStatus();
-        }
-    }
-#endif
+    CV_IPP_RUN(!angleInDegrees, ipp_polarToCart(Mag, Angle, X, Y));

    const Mat* arrays[] = {&Mag, &Angle, &X, &Y, 0};
    uchar* ptrs[4];
@ -1167,11 +1205,6 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst,

 #endif

-static void InvSqrt_32f(const float* src, float* dst, int n) { hal::invSqrt32f(src, dst, n); }
-static void InvSqrt_64f(const double* src, double* dst, int n) { hal::invSqrt64f(src, dst, n); }
-static void Sqrt_32f(const float* src, float* dst, int n) { hal::sqrt32f(src, dst, n); }
-static void Sqrt_64f(const double* src, double* dst, int n) { hal::sqrt64f(src, dst, n); }
-
 void pow( InputArray _src, double power, OutputArray _dst )
 {
    CV_INSTRUMENT_REGION()
@ -1228,8 +1261,8 @@ void pow( InputArray _src, double power, OutputArray _dst )
    else if( fabs(fabs(power) - 0.5) < DBL_EPSILON )
    {
        MathFunc func = power < 0 ?
-            (depth == CV_32F ? (MathFunc)InvSqrt_32f : (MathFunc)InvSqrt_64f) :
-            (depth == CV_32F ? (MathFunc)Sqrt_32f : (MathFunc)Sqrt_64f);
+            (depth == CV_32F ? (MathFunc)hal::invSqrt32f : (MathFunc)hal::invSqrt64f) :
+            (depth == CV_32F ? (MathFunc)hal::sqrt32f : (MathFunc)hal::sqrt64f);

        for( size_t i = 0; i < it.nplanes; i++, ++it )
            func( ptrs[0], ptrs[1], len );
@ -1261,24 +1294,6 @@ void pow( InputArray _src, double power, OutputArray _dst )
            {
                int bsz = std::min(len - j, blockSize);

-#if defined(HAVE_IPP)
-                CV_IPP_CHECK()
-                {
-                    IppStatus status = depth == CV_32F ?
-                    CV_INSTRUMENT_FUN_IPP(ippsPowx_32f_A21, (const float*)ptrs[0], (float)power, (float*)ptrs[1], bsz) :
-                    CV_INSTRUMENT_FUN_IPP(ippsPowx_64f_A50, (const double*)ptrs[0], (double)power, (double*)ptrs[1], bsz);
-
-                    if (status >= 0)
-                    {
-                        CV_IMPL_ADD(CV_IMPL_IPP);
-                        ptrs[0] += bsz*esz1;
-                        ptrs[1] += bsz*esz1;
-                        continue;
-                    }
-                    setIppErrorStatus();
-                }
-#endif
-
                if( depth == CV_32F )
                {
                    float* x0 = (float*)ptrs[0];
--- a/modules/core/src/mathfuncs_core.dispatch.cpp
+++ b/modules/core/src/mathfuncs_core.dispatch.cpp
@ -44,7 +44,7 @@ void magnitude32f(const float* x, const float* y, float* mag, int len)
    CV_INSTRUMENT_REGION()

    CALL_HAL(magnitude32f, cv_hal_magnitude32f, x, y, mag, len);
-    CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0);
+    CV_IPP_RUN(!IPP_DISABLE_PERF_MAG_SSE42 || (ipp::getIppFeatures()&ippCPUID_AVX), CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0);

    CV_CPU_DISPATCH(magnitude32f, (x, y, mag, len),
        CV_CPU_DISPATCH_MODES_ALL);
@ -55,7 +55,7 @@ void magnitude64f(const double* x, const double* y, double* mag, int len)
    CV_INSTRUMENT_REGION()

    CALL_HAL(magnitude64f, cv_hal_magnitude64f, x, y, mag, len);
-    CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsMagnitude_64f, x, y, mag, len) >= 0);
+    CV_IPP_RUN(!IPP_DISABLE_PERF_MAG_SSE42 || (ipp::getIppFeatures()&ippCPUID_AVX), CV_INSTRUMENT_FUN_IPP(ippsMagnitude_64f, x, y, mag, len) >= 0);

    CV_CPU_DISPATCH(magnitude64f, (x, y, mag, len),
        CV_CPU_DISPATCH_MODES_ALL);
--- a/modules/core/src/matmul.cpp
+++ b/modules/core/src/matmul.cpp
@ -3100,18 +3100,8 @@ dotProd_(const T* src1, const T* src2, int len)
 static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
 {
    double r = 0;
-#if ARITHM_USE_IPP && IPP_DISABLE_BLOCK
-    CV_IPP_CHECK()
-    {
-        if (0 <= CV_INSTRUMENT_FUN_IPP(ippiDotProd_8u64f_C1R, (src1, (int)(len*sizeof(src1[0])),
-                                       src2, (int)(len*sizeof(src2[0])),
-                                       ippiSize(len, 1), &r)))
-        {
-            CV_IMPL_ADD(CV_IMPL_IPP);
-            return r;
-        }
-        setIppErrorStatus();
-    }
+#if ARITHM_USE_IPP
+    CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiDotProd_8u64f_C1R, src1, len*sizeof(uchar), src2, len*sizeof(uchar), ippiSize(len, 1), &r) >= 0, r);
 #endif
    int i = 0;

@ -3298,51 +3288,27 @@ static double dotProd_8s(const schar* src1, const schar* src2, int len)

 static double dotProd_16u(const ushort* src1, const ushort* src2, int len)
 {
-#if (ARITHM_USE_IPP == 1)
-    CV_IPP_CHECK()
-    {
-        double r = 0;
-        if (0 <= CV_INSTRUMENT_FUN_IPP(ippiDotProd_16u64f_C1R, src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
-        {
-            CV_IMPL_ADD(CV_IMPL_IPP);
-            return r;
-        }
-        setIppErrorStatus();
-    }
+#if ARITHM_USE_IPP
+    double r = 0;
+    CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiDotProd_16u64f_C1R, src1, len*sizeof(ushort), src2, len*sizeof(ushort), ippiSize(len, 1), &r) >= 0, r);
 #endif
    return dotProd_(src1, src2, len);
 }

 static double dotProd_16s(const short* src1, const short* src2, int len)
 {
-#if (ARITHM_USE_IPP == 1) && (IPP_VERSION_X100 != 900) // bug in IPP 9.0.0
-    CV_IPP_CHECK()
-    {
-        double r = 0;
-        if (0 <= CV_INSTRUMENT_FUN_IPP(ippiDotProd_16s64f_C1R, src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
-        {
-            CV_IMPL_ADD(CV_IMPL_IPP);
-            return r;
-        }
-        setIppErrorStatus();
-    }
+#if ARITHM_USE_IPP && (IPP_VERSION_X100 != 900) // bug in IPP 9.0.0
+    double r = 0;
+    CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiDotProd_16s64f_C1R, src1, len*sizeof(short), src2, len*sizeof(short), ippiSize(len, 1), &r) >= 0, r);
 #endif
    return dotProd_(src1, src2, len);
 }

 static double dotProd_32s(const int* src1, const int* src2, int len)
 {
-#if (ARITHM_USE_IPP == 1)
-    CV_IPP_CHECK()
-    {
-        double r = 0;
-        if (0 <= CV_INSTRUMENT_FUN_IPP(ippiDotProd_32s64f_C1R, src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
-        {
-            CV_IMPL_ADD(CV_IMPL_IPP);
-            return r;
-        }
-        setIppErrorStatus();
-    }
+#if ARITHM_USE_IPP
+    double r = 0;
+    CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiDotProd_32s64f_C1R, src1, len*sizeof(int), src2, len*sizeof(int), ippiSize(len, 1), &r) >= 0, r);
 #endif
    return dotProd_(src1, src2, len);
 }
@ -3350,19 +3316,13 @@ static double dotProd_32s(const int* src1, const int* src2, int len)
 static double dotProd_32f(const float* src1, const float* src2, int len)
 {
    double r = 0.0;
+
+#if ARITHM_USE_IPP
+    CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiDotProd_32f64f_C1R, src1, len*sizeof(float), src2, len*sizeof(float), ippiSize(len, 1), &r, ippAlgHintFast) >= 0, r);
+#endif
    int i = 0;

-#if (ARITHM_USE_IPP == 1)
-    CV_IPP_CHECK()
-    {
-        if (0 <= CV_INSTRUMENT_FUN_IPP(ippsDotProd_32f64f, src1, src2, len, &r))
-        {
-            CV_IMPL_ADD(CV_IMPL_IPP);
-            return r;
-        }
-        setIppErrorStatus();
-    }
-#elif CV_NEON
+#if CV_NEON
    int len0 = len & -4, blockSize0 = (1 << 13), blockSize;
    float32x4_t v_zero = vdupq_n_f32(0.0f);
    CV_DECL_ALIGNED(16) float buf[4];
@ -3389,18 +3349,11 @@ static double dotProd_32f(const float* src1, const float* src2, int len)

 static double dotProd_64f(const double* src1, const double* src2, int len)
 {
-#if (ARITHM_USE_IPP == 1)
-    CV_IPP_CHECK()
-    {
-        double r = 0;
-        if (0 <= CV_INSTRUMENT_FUN_IPP(ippsDotProd_64f, src1, src2, len, &r))
-        {
-            CV_IMPL_ADD(CV_IMPL_IPP);
-            return r;
-        }
-        setIppErrorStatus();
-    }
+#if ARITHM_USE_IPP
+    double r = 0;
+    CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsDotProd_64f, src1, src2, len, &r) >= 0, r);
 #endif
+
    return dotProd_(src1, src2, len);
 }

--- a/modules/core/src/stat.cpp
+++ b/modules/core/src/stat.cpp
@ -1309,30 +1309,51 @@ static bool ipp_countNonZero( Mat &src, int &res )
 {
    CV_INSTRUMENT_REGION_IPP()

-    Ipp32s count = 0;
-    IppStatus status = ippStsNoErr;
+    Ipp32s  count = 0;
+    int     depth = src.depth();

-    int type = src.type(), depth = CV_MAT_DEPTH(type);
-    IppiSize roiSize = { src.cols, src.rows };
-    Ipp32s srcstep = (Ipp32s)src.step;
-    if (src.isContinuous())
+    if(src.dims <= 2)
    {
-        roiSize.width = (Ipp32s)src.total();
-        roiSize.height = 1;
-        srcstep = (Ipp32s)src.total() * CV_ELEM_SIZE(type);
-    }
+        IppStatus status;
+        IppiSize  size = {src.cols*src.channels(), src.rows};
+
+        if(depth == CV_8U)
+            status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_8u_C1R, (const Ipp8u *)src.ptr(), (int)src.step, size, &count, 0, 0);
+        else if(depth == CV_32F)
+            status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_32f_C1R, (const Ipp32f *)src.ptr(), (int)src.step, size, &count, 0, 0);
+        else
+            return false;

-    if (depth == CV_8U)
-        status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_8u_C1R, (const Ipp8u *)src.data, srcstep, roiSize, &count, 0, 0);
-    else if (depth == CV_32F)
-        status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_32f_C1R, (const Ipp32f *)src.data, srcstep, roiSize, &count, 0, 0);
+        if(status < 0)
+            return false;

-    if (status >= 0)
+        res = size.width*size.height - count;
+    }
+    else
    {
-        res = ((Ipp32s)src.total() - count);
-        return true;
+        IppStatus       status;
+        const Mat      *arrays[] = {&src, NULL};
+        uchar          *ptrs[1]  = {NULL};
+        NAryMatIterator it(arrays, ptrs);
+        IppiSize        size  = {(int)it.size*src.channels(), 1};
+
+        for (size_t i = 0; i < it.nplanes; i++, ++it)
+        {
+            if(depth == CV_8U)
+                status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_8u_C1R, (const Ipp8u *)src.ptr(), (int)src.step, size, &count, 0, 0);
+            else if(depth == CV_32F)
+                status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_32f_C1R, (const Ipp32f *)src.ptr(), (int)src.step, size, &count, 0, 0);
+            else
+                return false;
+
+            if(status < 0)
+                return false;
+
+            res += (size.width*size.height - count);
+        }
    }
-    return false;
+
+    return true;
 }
 }
 #endif
@ -1356,7 +1377,7 @@ int cv::countNonZero( InputArray _src )
 #endif

    Mat src = _src.getMat();
-    CV_IPP_RUN(0 && (_src.dims() <= 2 || _src.isContinuous()), ipp_countNonZero(src, res), res);
+    CV_IPP_RUN_FAST(ipp_countNonZero(src, res), res);

    CountNonZeroFunc func = getCountNonZeroTab(src.depth());
    CV_Assert( func != 0 );
@ -2373,109 +2394,273 @@ static bool openvx_minMaxIdx(Mat &src, double* minVal, double* maxVal, int* minI
 #endif

 #ifdef HAVE_IPP
-static bool ipp_minMaxIdx( Mat &src, double* minVal, double* maxVal, int* minIdx, int* maxIdx, Mat &mask)
+static IppStatus ipp_minMaxIndex_wrap(const void* pSrc, int srcStep, IppiSize size, IppDataType dataType,
+    float* pMinVal, float* pMaxVal, IppiPoint* pMinIndex, IppiPoint* pMaxIndex, const Ipp8u*, int)
 {
-    CV_INSTRUMENT_REGION_IPP()
+    switch(dataType)
+    {
+    case ipp8u:  return CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_8u_C1R, (const Ipp8u*)pSrc, srcStep, size, pMinVal, pMaxVal, pMinIndex, pMaxIndex);
+    case ipp16u: return CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_16u_C1R, (const Ipp16u*)pSrc, srcStep, size, pMinVal, pMaxVal, pMinIndex, pMaxIndex);
+    case ipp32f: return CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_32f_C1R, (const Ipp32f*)pSrc, srcStep, size, pMinVal, pMaxVal, pMinIndex, pMaxIndex);
+    default:     return ippStsDataTypeErr;
+    }
+}

-#if IPP_VERSION_X100 >= 700
-    int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
-    size_t total_size = src.total();
-    int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
-    if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
+static IppStatus ipp_minMaxIndexMask_wrap(const void* pSrc, int srcStep, IppiSize size, IppDataType dataType,
+    float* pMinVal, float* pMaxVal, IppiPoint* pMinIndex, IppiPoint* pMaxIndex, const Ipp8u* pMask, int maskStep)
+{
+    switch(dataType)
    {
-        IppiSize sz = { cols * cn, rows };
+    case ipp8u:  return CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_8u_C1MR, (const Ipp8u*)pSrc, srcStep, pMask, maskStep, size, pMinVal, pMaxVal, pMinIndex, pMaxIndex);
+    case ipp16u: return CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_16u_C1MR, (const Ipp16u*)pSrc, srcStep, pMask, maskStep, size, pMinVal, pMaxVal, pMinIndex, pMaxIndex);
+    case ipp32f: return CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_32f_C1MR, (const Ipp32f*)pSrc, srcStep, pMask, maskStep, size, pMinVal, pMaxVal, pMinIndex, pMaxIndex);
+    default:     return ippStsDataTypeErr;
+    }
+}

-        if( !mask.empty() )
-        {
-            typedef IppStatus (CV_STDCALL* ippiMaskMinMaxIndxFuncC1)(const void *, int, const void *, int,
-                                                                        IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *);
-
-            CV_SUPPRESS_DEPRECATED_START
-            ippiMaskMinMaxIndxFuncC1 ippiMinMaxIndx_C1MR =
-                type == CV_8UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1MR :
-#if IPP_VERSION_X100 < 900
-                type == CV_8SC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8s_C1MR :
+static IppStatus ipp_minMax_wrap(const void* pSrc, int srcStep, IppiSize size, IppDataType dataType,
+    float* pMinVal, float* pMaxVal, IppiPoint*, IppiPoint*, const Ipp8u*, int)
+{
+    IppStatus status;
+
+    switch(dataType)
+    {
+#if IPP_VERSION_X100 > 201701 // wrong min values
+    case ipp8u:
+    {
+        Ipp8u val[2];
+        status = CV_INSTRUMENT_FUN_IPP(ippiMinMax_8u_C1R, (const Ipp8u*)pSrc, srcStep, size, &val[0], &val[1]);
+        *pMinVal = val[0];
+        *pMaxVal = val[1];
+        return status;
+    }
 #endif
-                type == CV_16UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1MR :
-                type == CV_32FC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1MR : 0;
-            CV_SUPPRESS_DEPRECATED_END
+    case ipp16u:
+    {
+        Ipp16u val[2];
+        status = CV_INSTRUMENT_FUN_IPP(ippiMinMax_16u_C1R, (const Ipp16u*)pSrc, srcStep, size, &val[0], &val[1]);
+        *pMinVal = val[0];
+        *pMaxVal = val[1];
+        return status;
+    }
+    case ipp16s:
+    {
+        Ipp16s val[2];
+        status = CV_INSTRUMENT_FUN_IPP(ippiMinMax_16s_C1R, (const Ipp16s*)pSrc, srcStep, size, &val[0], &val[1]);
+        *pMinVal = val[0];
+        *pMaxVal = val[1];
+        return status;
+    }
+    case ipp32f: return CV_INSTRUMENT_FUN_IPP(ippiMinMax_32f_C1R, (const Ipp32f*)pSrc, srcStep, size, pMinVal, pMaxVal);
+    default:     return ipp_minMaxIndex_wrap(pSrc, srcStep, size, dataType, pMinVal, pMaxVal, NULL, NULL, NULL, 0);
+    }
+}

-            if( ippiMinMaxIndx_C1MR )
+static IppStatus ipp_minIdx_wrap(const void* pSrc, int srcStep, IppiSize size, IppDataType dataType,
+    float* pMinVal, float*, IppiPoint* pMinIndex, IppiPoint*, const Ipp8u*, int)
+{
+    IppStatus status;
+
+    switch(dataType)
+    {
+    case ipp8u:
+    {
+        Ipp8u val;
+        status = CV_INSTRUMENT_FUN_IPP(ippiMinIndx_8u_C1R, (const Ipp8u*)pSrc, srcStep, size, &val, &pMinIndex->x, &pMinIndex->y);
+        *pMinVal = val;
+        return status;
+    }
+    case ipp16u:
+    {
+        Ipp16u val;
+        status = CV_INSTRUMENT_FUN_IPP(ippiMinIndx_16u_C1R, (const Ipp16u*)pSrc, srcStep, size, &val, &pMinIndex->x, &pMinIndex->y);
+        *pMinVal = val;
+        return status;
+    }
+    case ipp16s:
+    {
+        Ipp16s val;
+        status = CV_INSTRUMENT_FUN_IPP(ippiMinIndx_16s_C1R, (const Ipp16s*)pSrc, srcStep, size, &val, &pMinIndex->x, &pMinIndex->y);
+        *pMinVal = val;
+        return status;
+    }
+    case ipp32f: return CV_INSTRUMENT_FUN_IPP(ippiMinIndx_32f_C1R, (const Ipp32f*)pSrc, srcStep, size, pMinVal, &pMinIndex->x, &pMinIndex->y);
+    default:     return ipp_minMaxIndex_wrap(pSrc, srcStep, size, dataType, pMinVal, NULL, pMinIndex, NULL, NULL, 0);
+    }
+}
+
+static IppStatus ipp_maxIdx_wrap(const void* pSrc, int srcStep, IppiSize size, IppDataType dataType,
+    float*, float* pMaxVal, IppiPoint*, IppiPoint* pMaxIndex, const Ipp8u*, int)
+{
+    IppStatus status;
+
+    switch(dataType)
+    {
+    case ipp8u:
+    {
+        Ipp8u val;
+        status = CV_INSTRUMENT_FUN_IPP(ippiMaxIndx_8u_C1R, (const Ipp8u*)pSrc, srcStep, size, &val, &pMaxIndex->x, &pMaxIndex->y);
+        *pMaxVal = val;
+        return status;
+    }
+    case ipp16u:
+    {
+        Ipp16u val;
+        status = CV_INSTRUMENT_FUN_IPP(ippiMaxIndx_16u_C1R, (const Ipp16u*)pSrc, srcStep, size, &val, &pMaxIndex->x, &pMaxIndex->y);
+        *pMaxVal = val;
+        return status;
+    }
+    case ipp16s:
+    {
+        Ipp16s val;
+        status = CV_INSTRUMENT_FUN_IPP(ippiMaxIndx_16s_C1R, (const Ipp16s*)pSrc, srcStep, size, &val, &pMaxIndex->x, &pMaxIndex->y);
+        *pMaxVal = val;
+        return status;
+    }
+    case ipp32f: return CV_INSTRUMENT_FUN_IPP(ippiMaxIndx_32f_C1R, (const Ipp32f*)pSrc, srcStep, size, pMaxVal, &pMaxIndex->x, &pMaxIndex->y);
+    default:     return ipp_minMaxIndex_wrap(pSrc, srcStep, size, dataType, NULL, pMaxVal, NULL, pMaxIndex, NULL, 0);
+    }
+}
+
+typedef IppStatus (*IppMinMaxSelector)(const void* pSrc, int srcStep, IppiSize size, IppDataType dataType,
+    float* pMinVal, float* pMaxVal, IppiPoint* pMinIndex, IppiPoint* pMaxIndex, const Ipp8u* pMask, int maskStep);
+
+static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minIdx, int* _maxIdx, Mat &mask)
+{
+#if IPP_VERSION_X100 >= 700
+    CV_INSTRUMENT_REGION_IPP()
+
+    IppStatus   status;
+    IppDataType dataType = ippiGetDataType(src.depth());
+    float       minVal = 0;
+    float       maxVal = 0;
+    IppiPoint   minIdx = {-1, -1};
+    IppiPoint   maxIdx = {-1, -1};
+
+    float       *pMinVal = (_minVal)?&minVal:NULL;
+    float       *pMaxVal = (_maxVal)?&maxVal:NULL;
+    IppiPoint   *pMinIdx = (_minIdx)?&minIdx:NULL;
+    IppiPoint   *pMaxIdx = (_maxIdx)?&maxIdx:NULL;
+
+    IppMinMaxSelector ippMinMaxFun = ipp_minMaxIndexMask_wrap;
+    if(mask.empty())
+    {
+        if(_maxVal && _maxIdx && !_minVal && !_minIdx)
+            ippMinMaxFun = ipp_maxIdx_wrap;
+        else if(!_maxVal && !_maxIdx && _minVal && _minIdx)
+            ippMinMaxFun = ipp_minIdx_wrap;
+        else if(_maxVal && !_maxIdx && _minVal && !_minIdx)
+            ippMinMaxFun = ipp_minMax_wrap;
+        else
+            ippMinMaxFun = ipp_minMaxIndex_wrap;
+    }
+
+    if(src.dims <= 2)
+    {
+        IppiSize size = ippiSize(src.size());
+        size.width *= src.channels();
+
+        status = ippMinMaxFun(src.ptr(), (int)src.step, size, dataType, pMinVal, pMaxVal, pMinIdx, pMaxIdx, (Ipp8u*)mask.ptr(), (int)mask.step);
+        if(status < 0 || status == ippStsNoOperation)
+            return false;
+        if(_minVal)
+            *_minVal = minVal;
+        if(_maxVal)
+            *_maxVal = maxVal;
+        if(_minIdx)
+        {
+            if(!mask.empty() && !minIdx.y && !minIdx.x)
            {
-                Ipp32f min, max;
-                IppiPoint minp, maxp;
-                if( CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_C1MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &min, &max, &minp, &maxp) >= 0 )
-                {
-                    if( minVal )
-                        *minVal = (double)min;
-                    if( maxVal )
-                        *maxVal = (double)max;
-                    if( !minp.x && !minp.y && !maxp.x && !maxp.y && !mask.ptr()[0] )
-                        minp.x = maxp.x = -1;
-                    if( minIdx )
-                    {
-                        size_t minidx = minp.y * cols + minp.x + 1;
-                        ofs2idx(src, minidx, minIdx);
-                    }
-                    if( maxIdx )
-                    {
-                        size_t maxidx = maxp.y * cols + maxp.x + 1;
-                        ofs2idx(src, maxidx, maxIdx);
-                    }
-                    return true;
-                }
+                _minIdx[0] = -1;
+                _minIdx[1] = -1;
+            }
+            else
+            {
+                _minIdx[0] = minIdx.y;
+                _minIdx[1] = minIdx.x;
            }
        }
-        else
+        if(_maxIdx)
        {
-            typedef IppStatus (CV_STDCALL* ippiMinMaxIndxFuncC1)(const void *, int, IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *);
-
-            CV_SUPPRESS_DEPRECATED_START
-            ippiMinMaxIndxFuncC1 ippiMinMaxIndx_C1R =
-#if IPP_VERSION_X100 != 900 // bug in 9.0.0 avx2 optimization
-                depth == CV_8U ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1R :
-#endif
-#if IPP_VERSION_X100 < 900
-                depth == CV_8S ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8s_C1R :
-#endif
-                depth == CV_16U ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1R :
-#if IPP_DISABLE_BLOCK && !((defined _MSC_VER && defined _M_IX86) || defined __i386__)
-                // See bug #4955: the function fails with SEGFAULT when the source matrix contains NANs
-                // IPPICV version is 9.0.1.
-                depth == CV_32F ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1R :
+            if(!mask.empty() && !maxIdx.y && !maxIdx.x)
+            {
+                _maxIdx[0] = -1;
+                _maxIdx[1] = -1;
+            }
+            else
+            {
+                _maxIdx[0] = maxIdx.y;
+                _maxIdx[1] = maxIdx.x;
+            }
+        }
+    }
+    else
+    {
+        const Mat *arrays[] = {&src, mask.empty()?NULL:&mask, NULL};
+        uchar     *ptrs[3]  = {NULL};
+        NAryMatIterator it(arrays, ptrs);
+        IppiSize size = ippiSize(it.size*src.channels(), 1);
+        int srcStep      = (int)(size.width*src.elemSize1());
+        int maskStep     = size.width;
+        size_t idxPos    = 1;
+        size_t minIdxAll = 0;
+        size_t maxIdxAll = 0;
+        float  minValAll = IPP_MAXABS_32F;
+        float  maxValAll = -IPP_MAXABS_32F;
+
+        for(size_t i = 0; i < it.nplanes; i++, ++it, idxPos += size.width)
+        {
+            status = ippMinMaxFun(ptrs[0], srcStep, size, dataType, pMinVal, pMaxVal, pMinIdx, pMaxIdx, ptrs[1], maskStep);
+            if(status < 0)
+                return false;
+#if IPP_VERSION_X100 > 201701
+            // Zero-mask check, function should return ippStsNoOperation warning
+            if(status == ippStsNoOperation)
+                    continue;
+#else
+            // Crude zero-mask check, waiting for fix in IPP function
+            if(ptrs[1])
+            {
+                Mat localMask(Size(size.width, 1), CV_8U, ptrs[1], maskStep);
+                if(!cv::countNonZero(localMask))
+                    continue;
+            }
 #endif
-                0;
-            CV_SUPPRESS_DEPRECATED_END

-            if( ippiMinMaxIndx_C1R )
+            if(_minVal && minVal < minValAll)
            {
-                Ipp32f min, max;
-                IppiPoint minp, maxp;
-                if( CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_C1R, src.ptr(), (int)src.step[0], sz, &min, &max, &minp, &maxp) >= 0 )
-                {
-                    if( minVal )
-                        *minVal = (double)min;
-                    if( maxVal )
-                        *maxVal = (double)max;
-                    if( minIdx )
-                    {
-                        size_t minidx = minp.y * cols + minp.x + 1;
-                        ofs2idx(src, minidx, minIdx);
-                    }
-                    if( maxIdx )
-                    {
-                        size_t maxidx = maxp.y * cols + maxp.x + 1;
-                        ofs2idx(src, maxidx, maxIdx);
-                    }
-                    return true;
-                }
+                minValAll = minVal;
+                minIdxAll = idxPos+minIdx.x;
+            }
+            if(_maxVal && maxVal > maxValAll)
+            {
+                maxValAll = maxVal;
+                maxIdxAll = idxPos+maxIdx.x;
            }
        }
+        if(!src.empty() && mask.empty())
+        {
+            if(minIdxAll == 0)
+                minIdxAll = 1;
+            if(maxValAll == 0)
+                maxValAll = 1;
+        }
+
+        if(_minVal)
+            *_minVal = minValAll;
+        if(_maxVal)
+            *_maxVal = maxValAll;
+        if(_minIdx)
+            ofs2idx(src, minIdxAll, _minIdx);
+        if(_maxIdx)
+            ofs2idx(src, maxIdxAll, _maxIdx);
    }
+
+    return true;
 #else
-#endif
    CV_UNUSED(src); CV_UNUSED(minVal); CV_UNUSED(maxVal); CV_UNUSED(minIdx); CV_UNUSED(maxIdx); CV_UNUSED(mask);
    return false;
+#endif
 }
 #endif

@ -2499,7 +2684,7 @@ void cv::minMaxIdx(InputArray _src, double* minVal,
    CV_OVX_RUN(true,
               openvx_minMaxIdx(src, minVal, maxVal, minIdx, maxIdx, mask))

-    CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_minMaxIdx(src, minVal, maxVal, minIdx, maxIdx, mask))
+    CV_IPP_RUN_FAST(ipp_minMaxIdx(src, minVal, maxVal, minIdx, maxIdx, mask))

    MinMaxIdxFunc func = getMinmaxTab(depth);
    CV_Assert( func != 0 );
@ -2837,42 +3022,31 @@ static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result)
    CV_INSTRUMENT_REGION_IPP()

 #if IPP_VERSION_X100 >= 700
-    int cn = src.channels();
    size_t total_size = src.total();
    int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;

    if( (src.dims == 2 || (src.isContinuous() && mask.isContinuous()))
-        && cols > 0 && (size_t)rows*cols == total_size
-        && (normType == NORM_INF || normType == NORM_L1 ||
-            normType == NORM_L2 || normType == NORM_L2SQR) )
+        && cols > 0 && (size_t)rows*cols == total_size )
    {
-        IppiSize sz = { cols, rows };
-        int type = src.type();
        if( !mask.empty() )
        {
+            IppiSize sz = { cols, rows };
+            int type = src.type();
+
            typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
            ippiMaskNormFuncC1 ippiNorm_C1MR =
                normType == NORM_INF ?
                (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8u_C1MR :
-#if IPP_VERSION_X100 < 900
-                type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8s_C1MR :
-#endif
-//                type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR :
+                type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR :
                type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_32f_C1MR :
                0) :
            normType == NORM_L1 ?
                (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8u_C1MR :
-#if IPP_VERSION_X100 < 900
-                type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8s_C1MR :
-#endif
                type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_16u_C1MR :
                type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_32f_C1MR :
                0) :
            normType == NORM_L2 || normType == NORM_L2SQR ?
                (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8u_C1MR :
-#if IPP_VERSION_X100 < 900
-                type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8s_C1MR :
-#endif
                type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_16u_C1MR :
                type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_32f_C1MR :
                0) : 0;
@ -2885,39 +3059,29 @@ static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result)
                    return true;
                }
            }
-#if IPP_DISABLE_BLOCK
            typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
            ippiMaskNormFuncC3 ippiNorm_C3CMR =
                normType == NORM_INF ?
                (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8u_C3CMR :
-#if IPP_VERSION_X100 < 900
-                type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8s_C3CMR :
-#endif
                type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_16u_C3CMR :
                type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_32f_C3CMR :
                0) :
            normType == NORM_L1 ?
                (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8u_C3CMR :
-#if IPP_VERSION_X100 < 900
-                type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8s_C3CMR :
-#endif
                type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_16u_C3CMR :
                type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_32f_C3CMR :
                0) :
            normType == NORM_L2 || normType == NORM_L2SQR ?
                (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8u_C3CMR :
-#if IPP_VERSION_X100 < 900
-                type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8s_C3CMR :
-#endif
                type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_16u_C3CMR :
                type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_32f_C3CMR :
                0) : 0;
            if( ippiNorm_C3CMR )
            {
                Ipp64f norm1, norm2, norm3;
-                if( CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, (src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1)) >= 0 &&
-                    CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, (src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2)) >= 0 &&
-                    CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, (src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3)) >= 0)
+                if( CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
+                    CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
+                    CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
                {
                    Ipp64f norm =
                        normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
@ -2928,81 +3092,46 @@ static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result)
                    return true;
                }
            }
-#endif
        }
        else
        {
+            IppiSize sz = { cols*src.channels(), rows };
+            int type = src.depth();
+
            typedef IppStatus (CV_STDCALL* ippiNormFuncHint)(const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
            typedef IppStatus (CV_STDCALL* ippiNormFuncNoHint)(const void *, int, IppiSize, Ipp64f *);
            ippiNormFuncHint ippiNormHint =
                normType == NORM_L1 ?
                (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L1_32f_C1R :
-                type == CV_32FC3 ? (ippiNormFuncHint)ippiNorm_L1_32f_C3R :
-                type == CV_32FC4 ? (ippiNormFuncHint)ippiNorm_L1_32f_C4R :
                0) :
                normType == NORM_L2 || normType == NORM_L2SQR ?
                (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L2_32f_C1R :
-                type == CV_32FC3 ? (ippiNormFuncHint)ippiNorm_L2_32f_C3R :
-                type == CV_32FC4 ? (ippiNormFuncHint)ippiNorm_L2_32f_C4R :
                0) : 0;
            ippiNormFuncNoHint ippiNorm =
                normType == NORM_INF ?
                (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C1R :
-                type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C3R :
-                type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C4R :
                type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C1R :
-                type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C3R :
-                type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C4R :
                type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C1R :
-#if (IPP_VERSION_X100 >= 810)
-                type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
-                type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
-#endif
                type == CV_32FC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C1R :
-                type == CV_32FC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C3R :
-                type == CV_32FC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C4R :
                0) :
                normType == NORM_L1 ?
                (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C1R :
-                type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C3R :
-                type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C4R :
                type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C1R :
-                type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C3R :
-                type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C4R :
                type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C1R :
-                type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C3R :
-                type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C4R :
                0) :
                normType == NORM_L2 || normType == NORM_L2SQR ?
                (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C1R :
-                type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C3R :
-                type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C4R :
                type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C1R :
-                type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C3R :
-                type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C4R :
                type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C1R :
-                type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C3R :
-                type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C4R :
                0) : 0;
-            // Make sure only zero or one version of the function pointer is valid
-            CV_Assert(!ippiNormHint || !ippiNorm);
            if( ippiNormHint || ippiNorm )
            {
-                Ipp64f norm_array[4];
-                IppStatus ret = ippiNormHint ? CV_INSTRUMENT_FUN_IPP(ippiNormHint, src.ptr(), (int)src.step[0], sz, norm_array, ippAlgHintAccurate) :
-                                CV_INSTRUMENT_FUN_IPP(ippiNorm, src.ptr(), (int)src.step[0], sz, norm_array);
+                Ipp64f norm;
+                IppStatus ret = ippiNormHint ? CV_INSTRUMENT_FUN_IPP(ippiNormHint, src.ptr(), (int)src.step[0], sz, &norm, ippAlgHintAccurate) :
+                                CV_INSTRUMENT_FUN_IPP(ippiNorm, src.ptr(), (int)src.step[0], sz, &norm);
                if( ret >= 0 )
                {
-                    Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0];
-                    for( int i = 1; i < cn; i++ )
-                    {
-                        norm =
-                            normType == NORM_INF ? std::max(norm, norm_array[i]) :
-                            normType == NORM_L1 ? norm + norm_array[i] :
-                            normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] :
-                            0;
-                    }
-                    result = (normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm);
+                    result = (normType == NORM_L2SQR) ? norm * norm : norm;
                    return true;
                }
            }
@ -3248,53 +3377,38 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra
    if( normType & CV_RELATIVE )
    {
        normType &= NORM_TYPE_MASK;
-        CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
-                ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
+
        size_t total_size = src1.total();
        int rows = src1.size[0], cols = rows ? (int)(total_size/rows) : 0;
        if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
-            && cols > 0 && (size_t)rows*cols == total_size
-            && (normType == NORM_INF || normType == NORM_L1 ||
-                normType == NORM_L2 || normType == NORM_L2SQR) )
+            && cols > 0 && (size_t)rows*cols == total_size )
        {
-            IppiSize sz = { cols, rows };
-            int type = src1.type();
            if( !mask.empty() )
            {
-                typedef IppStatus (CV_STDCALL* ippiMaskNormRelFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
-                ippiMaskNormRelFuncC1 ippiNormDiff_C1MR =
+                IppiSize sz = { cols, rows };
+                int type = src1.type();
+
+                typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
+                ippiMaskNormDiffFuncC1 ippiNormRel_C1MR =
                    normType == NORM_INF ?
-                    (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8u_C1MR :
-#if IPP_VERSION_X100 < 900
-#ifndef __APPLE__
-                    type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8s_C1MR :
-#endif
-#endif
-                    type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_16u_C1MR :
-                    type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_32f_C1MR :
+                    (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_Inf_8u_C1MR :
+                    type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_Inf_16u_C1MR :
+                    type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_Inf_32f_C1MR :
                    0) :
                    normType == NORM_L1 ?
-                    (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8u_C1MR :
-#if IPP_VERSION_X100 < 900
-#ifndef __APPLE__
-                    type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8s_C1MR :
-#endif
-#endif
-                    type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_16u_C1MR :
-                    type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_32f_C1MR :
+                    (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L1_8u_C1MR :
+                    type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L1_16u_C1MR :
+                    type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L1_32f_C1MR :
                    0) :
                    normType == NORM_L2 || normType == NORM_L2SQR ?
-                    (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8u_C1MR :
-#if IPP_VERSION_X100 < 900
-                    type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8s_C1MR :
-#endif
-                    type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_16u_C1MR :
-                    type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_32f_C1MR :
+                    (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L2_8u_C1MR :
+                    type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L2_16u_C1MR :
+                    type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L2_32f_C1MR :
                    0) : 0;
-                if( ippiNormDiff_C1MR )
+                if( ippiNormRel_C1MR )
                {
                    Ipp64f norm;
-                    if( CV_INSTRUMENT_FUN_IPP(ippiNormDiff_C1MR, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
+                    if( CV_INSTRUMENT_FUN_IPP(ippiNormRel_C1MR, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
                    {
                        result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
                        return true;
@ -3303,47 +3417,43 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra
            }
            else
            {
-                typedef IppStatus (CV_STDCALL* ippiNormRelFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
+                IppiSize sz = { cols*src1.channels(), rows };
+                int type = src1.depth();
+
                typedef IppStatus (CV_STDCALL* ippiNormRelFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
-                ippiNormRelFuncNoHint ippiNormDiff =
-                    normType == NORM_INF ?
-                    (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_8u_C1R :
-                    type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16u_C1R :
-                    type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16s_C1R :
-                    type == CV_32FC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_32f_C1R :
-                    0) :
+                typedef IppStatus (CV_STDCALL* ippiNormRelFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
+                ippiNormRelFuncHint ippiNormRelHint =
                    normType == NORM_L1 ?
-                    (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_8u_C1R :
-                    type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16u_C1R :
-                    type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16s_C1R :
+                    (type == CV_32F ? (ippiNormRelFuncHint)ippiNormRel_L1_32f_C1R :
                    0) :
                    normType == NORM_L2 || normType == NORM_L2SQR ?
-                    (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_8u_C1R :
-                    type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16u_C1R :
-                    type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16s_C1R :
+                    (type == CV_32F ? (ippiNormRelFuncHint)ippiNormRel_L2_32f_C1R :
                    0) : 0;
-                ippiNormRelFuncHint ippiNormDiffHint =
+                ippiNormRelFuncNoHint ippiNormRel =
+                    normType == NORM_INF ?
+                    (type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_8u_C1R :
+                    type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16u_C1R :
+                    type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16s_C1R :
+                    type == CV_32F ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_32f_C1R :
+                    0) :
                    normType == NORM_L1 ?
-                    (type == CV_32FC1 ? (ippiNormRelFuncHint)ippiNormRel_L1_32f_C1R :
+                    (type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_L1_8u_C1R :
+                    type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16u_C1R :
+                    type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16s_C1R :
                    0) :
                    normType == NORM_L2 || normType == NORM_L2SQR ?
-                    (type == CV_32FC1 ? (ippiNormRelFuncHint)ippiNormRel_L2_32f_C1R :
+                    (type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_L2_8u_C1R :
+                    type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16u_C1R :
+                    type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16s_C1R :
                    0) : 0;
-                if (ippiNormDiff)
-                {
-                    Ipp64f norm;
-                    if( CV_INSTRUMENT_FUN_IPP(ippiNormDiff, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm) >= 0 )
-                    {
-                        result = (double)norm;
-                        return true;
-                    }
-                }
-                if (ippiNormDiffHint)
+                if( ippiNormRelHint || ippiNormRel )
                {
                    Ipp64f norm;
-                    if( CV_INSTRUMENT_FUN_IPP(ippiNormDiffHint, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm, ippAlgHintAccurate) >= 0 )
+                    IppStatus ret = ippiNormRelHint ? CV_INSTRUMENT_FUN_IPP(ippiNormRelHint, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm, ippAlgHintAccurate) :
+                                    CV_INSTRUMENT_FUN_IPP(ippiNormRel, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm);
+                    if( ret >= 0 )
                    {
-                        result = (double)norm;
+                        result = (normType == NORM_L2SQR) ? norm * norm : norm;
                        return true;
                    }
                }
@ -3352,47 +3462,32 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra
        return false;
    }

-    normType &= 7;
-    CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
-               normType == NORM_L2 || normType == NORM_L2SQR ||
-              ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
+    normType &= NORM_TYPE_MASK;

    size_t total_size = src1.total();
    int rows = src1.size[0], cols = rows ? (int)(total_size/rows) : 0;
    if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
-        && cols > 0 && (size_t)rows*cols == total_size
-        && (normType == NORM_INF || normType == NORM_L1 ||
-            normType == NORM_L2 || normType == NORM_L2SQR) )
+        && cols > 0 && (size_t)rows*cols == total_size )
    {
-        IppiSize sz = { cols, rows };
-        int type = src1.type();
        if( !mask.empty() )
        {
+            IppiSize sz = { cols, rows };
+            int type = src1.type();
+
            typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
            ippiMaskNormDiffFuncC1 ippiNormDiff_C1MR =
                normType == NORM_INF ?
                (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8u_C1MR :
-#if IPP_VERSION_X100 < 900
-                type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8s_C1MR :
-#endif
                type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_16u_C1MR :
                type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_32f_C1MR :
                0) :
                normType == NORM_L1 ?
                (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8u_C1MR :
-#if IPP_VERSION_X100 < 900
-#ifndef __APPLE__
-                type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8s_C1MR :
-#endif
-#endif
                type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_16u_C1MR :
                type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_32f_C1MR :
                0) :
                normType == NORM_L2 || normType == NORM_L2SQR ?
                (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8u_C1MR :
-#if IPP_VERSION_X100 < 900
-                type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8s_C1MR :
-#endif
                type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_16u_C1MR :
                type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_32f_C1MR :
                0) : 0;
@ -3405,30 +3500,20 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra
                    return true;
                }
            }
-#ifndef __APPLE__
            typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC3)(const void *, int, const void *, int, const void *, int, IppiSize, int, Ipp64f *);
            ippiMaskNormDiffFuncC3 ippiNormDiff_C3CMR =
                normType == NORM_INF ?
                (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8u_C3CMR :
-#if IPP_VERSION_X100 < 900
-                type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8s_C3CMR :
-#endif
                type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_16u_C3CMR :
                type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_32f_C3CMR :
                0) :
                normType == NORM_L1 ?
                (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8u_C3CMR :
-#if IPP_VERSION_X100 < 900
-                type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8s_C3CMR :
-#endif
                type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_16u_C3CMR :
                type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_32f_C3CMR :
                0) :
                normType == NORM_L2 || normType == NORM_L2SQR ?
                (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8u_C3CMR :
-#if IPP_VERSION_X100 < 900
-                type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8s_C3CMR :
-#endif
                type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_16u_C3CMR :
                type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_32f_C3CMR :
                0) : 0;
@ -3448,83 +3533,46 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra
                    return true;
                }
            }
-#endif
        }
        else
        {
+            IppiSize sz = { cols*src1.channels(), rows };
+            int type = src1.depth();
+
            typedef IppStatus (CV_STDCALL* ippiNormDiffFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
            typedef IppStatus (CV_STDCALL* ippiNormDiffFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
            ippiNormDiffFuncHint ippiNormDiffHint =
                normType == NORM_L1 ?
-                (type == CV_32FC1 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C1R :
-                type == CV_32FC3 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C3R :
-                type == CV_32FC4 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C4R :
+                (type == CV_32F ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C1R :
                0) :
                normType == NORM_L2 || normType == NORM_L2SQR ?
-                (type == CV_32FC1 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C1R :
-                type == CV_32FC3 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C3R :
-                type == CV_32FC4 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C4R :
+                (type == CV_32F ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C1R :
                0) : 0;
            ippiNormDiffFuncNoHint ippiNormDiff =
                normType == NORM_INF ?
-                (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C1R :
-                type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C3R :
-                type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C4R :
-                type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C1R :
-                type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C3R :
-                type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C4R :
-                type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R :
-#if (IPP_VERSION_X100 >= 810)
-                type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
-                type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
-#endif
-                type == CV_32FC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R :
-                type == CV_32FC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C3R :
-                type == CV_32FC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C4R :
+                (type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C1R :
+                type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C1R :
+                type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R :
+                type == CV_32F ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R :
                0) :
                normType == NORM_L1 ?
-                (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C1R :
-                type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C3R :
-                type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C4R :
-                type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C1R :
-                type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C3R :
-                type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C4R :
-#if !(IPP_VERSION_X100 == 820 || IPP_VERSION_X100 == 821) // Oct 2014: Accuracy issue with IPP 8.2 / 8.2.1
-                type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C1R :
-#endif
-                type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C3R :
-                type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C4R :
+                (type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C1R :
+                type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C1R :
+                type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C1R :
                0) :
                normType == NORM_L2 || normType == NORM_L2SQR ?
-                (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C1R :
-                type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C3R :
-                type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C4R :
-                type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C1R :
-                type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C3R :
-                type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C4R :
-                type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C1R :
-                type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C3R :
-                type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C4R :
+                (type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C1R :
+                type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C1R :
+                type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C1R :
                0) : 0;
-            // Make sure only zero or one version of the function pointer is valid
-            CV_Assert(!ippiNormDiffHint || !ippiNormDiff);
            if( ippiNormDiffHint || ippiNormDiff )
            {
-                Ipp64f norm_array[4];
-                IppStatus ret = ippiNormDiffHint ? CV_INSTRUMENT_FUN_IPP(ippiNormDiffHint, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, norm_array, ippAlgHintAccurate) :
-                                CV_INSTRUMENT_FUN_IPP(ippiNormDiff, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, norm_array);
+                Ipp64f norm;
+                IppStatus ret = ippiNormDiffHint ? CV_INSTRUMENT_FUN_IPP(ippiNormDiffHint, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm, ippAlgHintAccurate) :
+                                CV_INSTRUMENT_FUN_IPP(ippiNormDiff, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm);
                if( ret >= 0 )
                {
-                    Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0];
-                    for( int i = 1; i < src1.channels(); i++ )
-                    {
-                        norm =
-                            normType == NORM_INF ? std::max(norm, norm_array[i]) :
-                            normType == NORM_L1 ? norm + norm_array[i] :
-                            normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] :
-                            0;
-                    }
-                    result = (normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm);
+                    result = (normType == NORM_L2SQR) ? norm * norm : norm;
                    return true;
                }
            }
--- a/modules/imgproc/src/canny.cpp
+++ b/modules/imgproc/src/canny.cpp
@ -51,14 +51,6 @@
 #pragma warning( disable: 4127 ) // conditional expression is constant
 #endif

-
-#if defined (HAVE_IPP) && (IPP_VERSION_X100 >= 700)
-#define USE_IPP_CANNY 1
-#else
-#define USE_IPP_CANNY 0
-#endif
-
-
 namespace cv
 {

@ -66,73 +58,79 @@ static void CannyImpl(Mat& dx_, Mat& dy_, Mat& _dst, double low_thresh, double h


 #ifdef HAVE_IPP
-template <bool useCustomDeriv>
-static bool ippCanny(const Mat& _src, const Mat& dx_, const Mat& dy_, Mat& _dst, float low, float high)
+static bool ipp_Canny(const Mat& src , const Mat& dx_, const Mat& dy_, Mat& dst, float low,  float high, bool L2gradient, int aperture_size)
 {
+#ifdef HAVE_IPP_IW
    CV_INSTRUMENT_REGION_IPP()

-#if USE_IPP_CANNY
-    if (!useCustomDeriv && _src.isSubmatrix())
-        return false; // IPP Sobel doesn't support transparent ROI border
+#if IPP_DISABLE_PERF_CANNY_MT
+    if(cv::getNumThreads()>1)
+        return false;
+#endif

-    int size = 0, size1 = 0;
-    IppiSize roi = { _src.cols, _src.rows };
+    ::ipp::IwiSize size(dst.cols, dst.rows);
+    IppDataType    type     = ippiGetDataType(dst.depth());
+    int            channels = dst.channels();
+    IppNormType    norm     = (L2gradient)?ippNormL2:ippNormL1;

-    if (ippiCannyGetSize(roi, &size) < 0)
+    if(size.width <= 3 || size.height <= 3)
        return false;

-    if (!useCustomDeriv)
-    {
-#if IPP_VERSION_X100 < 900
-        if (ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size1) < 0)
-            return false;
-        size = std::max(size, size1);
-        if (ippiFilterSobelHorizGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size1) < 0)
-            return false;
-#else
-        if (ippiFilterSobelNegVertBorderGetBufferSize(roi, ippMskSize3x3, ipp8u, ipp16s, 1, &size1) < 0)
-            return false;
-        size = std::max(size, size1);
-        if (ippiFilterSobelHorizBorderGetBufferSize(roi, ippMskSize3x3, ipp8u, ipp16s, 1, &size1) < 0)
-            return false;
-#endif
-        size = std::max(size, size1);
-    }
+    if(channels != 1)
+        return false;

-    AutoBuffer<uchar> buf(size + 64);
-    uchar* buffer = alignPtr((uchar*)buf, 32);
+    if(type != ipp8u)
+        return false;

-    Mat dx, dy;
-    if (!useCustomDeriv)
+    if(src.empty())
    {
-        Mat _dx(_src.rows, _src.cols, CV_16S);
-        if( CV_INSTRUMENT_FUN_IPP(ippiFilterSobelNegVertBorder_8u16s_C1R, _src.ptr(), (int)_src.step,
-                        _dx.ptr<short>(), (int)_dx.step, roi,
-                        ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 )
-            return false;
+        try
+        {
+            ::ipp::IwiImage iwSrcDx;
+            ::ipp::IwiImage iwSrcDy;
+            ::ipp::IwiImage iwDst;

-        Mat _dy(_src.rows, _src.cols, CV_16S);
-        if( CV_INSTRUMENT_FUN_IPP(ippiFilterSobelHorizBorder_8u16s_C1R, _src.ptr(), (int)_src.step,
-                        _dy.ptr<short>(), (int)_dy.step, roi,
-                        ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 )
-            return false;
+            ippiGetImage(dx_, iwSrcDx);
+            ippiGetImage(dy_, iwSrcDy);
+            ippiGetImage(dst, iwDst);

-        swap(dx, _dx);
-        swap(dy, _dy);
+            CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCannyDeriv, &iwSrcDx, &iwSrcDy, &iwDst, norm, low, high);
+        }
+        catch (::ipp::IwException ex)
+        {
+            return false;
+        }
    }
    else
    {
-        dx = dx_;
-        dy = dy_;
+        IppiMaskSize kernel;
+
+        if(aperture_size == 3)
+            kernel = ippMskSize3x3;
+        else if(aperture_size == 5)
+            kernel = ippMskSize5x5;
+        else
+            return false;
+
+        try
+        {
+            ::ipp::IwiImage iwSrc;
+            ::ipp::IwiImage iwDst;
+
+            ippiGetImage(src, iwSrc);
+            ippiGetImage(dst, iwDst);
+
+            CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCanny, &iwSrc, &iwDst, ippFilterSobel, kernel, norm, low, high, ippBorderRepl);
+        }
+        catch (::ipp::IwException)
+        {
+            return false;
+        }
    }

-    if( CV_INSTRUMENT_FUN_IPP(ippiCanny_16s8u_C1R, dx.ptr<short>(), (int)dx.step,
-                               dy.ptr<short>(), (int)dy.step,
-                              _dst.ptr(), (int)_dst.step, roi, low, high, buffer) < 0 )
-        return false;
    return true;
 #else
-    CV_UNUSED(_src); CV_UNUSED(dx_); CV_UNUSED(dy_); CV_UNUSED(_dst); CV_UNUSED(low); CV_UNUSED(high);
+    CV_UNUSED(src); CV_UNUSED(dx_); CV_UNUSED(dy_); CV_UNUSED(dst); CV_UNUSED(low); CV_UNUSED(high); CV_UNUSED(L2gradient); CV_UNUSED(aperture_size);
    return false;
 #endif
 }
@ -318,6 +316,8 @@ public:
        // In sobel transform we calculate ksize2 extra lines for the first and last rows of each slice
        // because IPPDerivSobel expects only isolated ROIs, in contrast with the opencv version which
        // uses the pixels outside of the ROI to form a border.
+        //
+        // TODO: statement above is not true anymore, so adjustments may be required
        int ksize2 = aperture_size / 2;
        // If Scharr filter: aperture_size is 3 and ksize2 is 1
        if(aperture_size == -1)
@ -882,18 +882,18 @@ void Canny( InputArray _src, OutputArray _dst,
        return;
 #endif

-    CV_IPP_RUN(USE_IPP_CANNY && (aperture_size == 3 && !L2gradient && 1 == cn), ippCanny<false>(src, Mat(), Mat(), dst, (float)low_thresh, (float)high_thresh))
+    CV_IPP_RUN_FAST(ipp_Canny(src, Mat(), Mat(), dst, (float)low_thresh, (float)high_thresh, L2gradient, aperture_size))

-if (L2gradient)
-{
-    low_thresh = std::min(32767.0, low_thresh);
-    high_thresh = std::min(32767.0, high_thresh);
+    if (L2gradient)
+    {
+        low_thresh = std::min(32767.0, low_thresh);
+        high_thresh = std::min(32767.0, high_thresh);

-    if (low_thresh > 0) low_thresh *= low_thresh;
-    if (high_thresh > 0) high_thresh *= high_thresh;
-}
-int low = cvFloor(low_thresh);
-int high = cvFloor(high_thresh);
+        if (low_thresh > 0) low_thresh *= low_thresh;
+        if (high_thresh > 0) high_thresh *= high_thresh;
+    }
+    int low = cvFloor(low_thresh);
+    int high = cvFloor(high_thresh);

    ptrdiff_t mapstep = src.cols + 2;
    AutoBuffer<uchar> buffer((src.cols+2)*(src.rows+2) + cn * mapstep * 3 * sizeof(int));
@ -938,15 +938,15 @@ int high = cvFloor(high_thresh);
    {
        m = borderPeaksParallel.front();
        borderPeaksParallel.pop();
-    if (!m[-1])         CANNY_PUSH_SERIAL(m - 1);
-    if (!m[1])          CANNY_PUSH_SERIAL(m + 1);
-    if (!m[-mapstep-1]) CANNY_PUSH_SERIAL(m - mapstep - 1);
-    if (!m[-mapstep])   CANNY_PUSH_SERIAL(m - mapstep);
-    if (!m[-mapstep+1]) CANNY_PUSH_SERIAL(m - mapstep + 1);
-    if (!m[mapstep-1])  CANNY_PUSH_SERIAL(m + mapstep - 1);
-    if (!m[mapstep])    CANNY_PUSH_SERIAL(m + mapstep);
-    if (!m[mapstep+1])  CANNY_PUSH_SERIAL(m + mapstep + 1);
-}
+        if (!m[-1])         CANNY_PUSH_SERIAL(m - 1);
+        if (!m[1])          CANNY_PUSH_SERIAL(m + 1);
+        if (!m[-mapstep-1]) CANNY_PUSH_SERIAL(m - mapstep - 1);
+        if (!m[-mapstep])   CANNY_PUSH_SERIAL(m - mapstep);
+        if (!m[-mapstep+1]) CANNY_PUSH_SERIAL(m - mapstep + 1);
+        if (!m[mapstep-1])  CANNY_PUSH_SERIAL(m + mapstep - 1);
+        if (!m[mapstep])    CANNY_PUSH_SERIAL(m + mapstep);
+        if (!m[mapstep+1])  CANNY_PUSH_SERIAL(m + mapstep + 1);
+    }

    parallel_for_(Range(0, dst.rows), finalPass(map, dst, mapstep), dst.total()/(double)(1<<16));
 }
@ -955,6 +955,8 @@ void Canny( InputArray _dx, InputArray _dy, OutputArray _dst,
                double low_thresh, double high_thresh,
                bool L2gradient )
 {
+    CV_INSTRUMENT_REGION()
+
    CV_Assert(_dx.dims() == 2);
    CV_Assert(_dx.type() == CV_16SC1 || _dx.type() == CV_16SC3);
    CV_Assert(_dy.type() == _dx.type());
@ -975,7 +977,7 @@ void Canny( InputArray _dx, InputArray _dy, OutputArray _dst,
    Mat dx = _dx.getMat();
    Mat dy = _dy.getMat();

-    CV_IPP_RUN(USE_IPP_CANNY && (!L2gradient && 1 == cn), ippCanny<true>(Mat(), dx, dy, dst, (float)low_thresh, (float)high_thresh))
+    CV_IPP_RUN_FAST(ipp_Canny(Mat(), dx, dy, dst, (float)low_thresh, (float)high_thresh, L2gradient, 0))

    if (cn > 1)
    {
--- a/modules/imgproc/src/corner.cpp
+++ b/modules/imgproc/src/corner.cpp
@ -604,9 +604,9 @@ namespace cv
 {
 static bool ipp_cornerMinEigenVal( InputArray _src, OutputArray _dst, int blockSize, int ksize, int borderType )
 {
+#if IPP_VERSION_X100 >= 800
    CV_INSTRUMENT_REGION_IPP()

-#if IPP_VERSION_X100 >= 800
    Mat src = _src.getMat();
    _dst.create( src.size(), CV_32FC1 );
    Mat dst = _dst.getMat();
@ -703,15 +703,11 @@ void cv::cornerMinEigenVal( InputArray _src, OutputArray _dst, int blockSize, in
 #if defined(HAVE_IPP)
 namespace cv
 {
-static bool ipp_cornerHarris( InputArray _src, OutputArray _dst, int blockSize, int ksize, double k, int borderType )
+static bool ipp_cornerHarris( Mat &src, Mat &dst, int blockSize, int ksize, double k, int borderType )
 {
+#if IPP_VERSION_X100 >= 810
    CV_INSTRUMENT_REGION_IPP()

-#if IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK
-    Mat src = _src.getMat();
-    _dst.create( src.size(), CV_32FC1 );
-    Mat dst = _dst.getMat();
-
    {
        int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
        int borderTypeNI = borderType & ~BORDER_ISOLATED;
@ -734,17 +730,17 @@ static bool ipp_cornerHarris( InputArray _src, OutputArray _dst, int blockSize,

            if (ippiHarrisCornerGetBufferSize(roisize, masksize, blockSize, datatype, cn, &bufsize) >= 0)
            {
-                Ipp8u * buffer = ippsMalloc_8u(bufsize);
+                Ipp8u * buffer = (Ipp8u*)CV_IPP_MALLOC(bufsize);
                IppiDifferentialKernel filterType = ksize > 0 ? ippFilterSobel : ippFilterScharr;
                IppiBorderType borderTypeIpp = borderTypeNI == BORDER_CONSTANT ? ippBorderConst : ippBorderRepl;
                IppStatus status = (IppStatus)-1;

                if (depth == CV_8U)
-                    status = CV_INSTRUMENT_FUN_IPP(ippiHarrisCorner_8u32f_C1R,((const Ipp8u *)src.data, (int)src.step, (Ipp32f *)dst.data, (int)dst.step, roisize,
-                        filterType, masksize, blockSize, (Ipp32f)k, (Ipp32f)scale, borderTypeIpp, 0, buffer));
+                    status = CV_INSTRUMENT_FUN_IPP(ippiHarrisCorner_8u32f_C1R, (const Ipp8u *)src.data, (int)src.step, (Ipp32f *)dst.data, (int)dst.step, roisize,
+                        filterType, masksize, blockSize, (Ipp32f)k, (Ipp32f)scale, borderTypeIpp, 0, buffer);
                else if (depth == CV_32F)
-                    status = CV_INSTRUMENT_FUN_IPP(ippiHarrisCorner_32f_C1R,((const Ipp32f *)src.data, (int)src.step, (Ipp32f *)dst.data, (int)dst.step, roisize,
-                        filterType, masksize, blockSize, (Ipp32f)k, (Ipp32f)scale, borderTypeIpp, 0, buffer));
+                    status = CV_INSTRUMENT_FUN_IPP(ippiHarrisCorner_32f_C1R, (const Ipp32f *)src.data, (int)src.step, (Ipp32f *)dst.data, (int)dst.step, roisize,
+                        filterType, masksize, blockSize, (Ipp32f)k, (Ipp32f)scale, borderTypeIpp, 0, buffer);
                ippsFree(buffer);

                if (status >= 0)
@ -756,7 +752,7 @@ static bool ipp_cornerHarris( InputArray _src, OutputArray _dst, int blockSize,
        }
    }
 #else
-    CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(blockSize);  CV_UNUSED(ksize); CV_UNUSED(k); CV_UNUSED(borderType);
+    CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(blockSize);  CV_UNUSED(ksize); CV_UNUSED(k); CV_UNUSED(borderType);
 #endif
    return false;
 }
@ -770,19 +766,17 @@ void cv::cornerHarris( InputArray _src, OutputArray _dst, int blockSize, int ksi
    CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
               ocl_cornerMinEigenValVecs(_src, _dst, blockSize, ksize, k, borderType, HARRIS))

+    Mat src = _src.getMat();
+    _dst.create( src.size(), CV_32FC1 );
+    Mat dst = _dst.getMat();
+
 #ifdef HAVE_IPP
    int borderTypeNI = borderType & ~BORDER_ISOLATED;
    bool isolated = (borderType & BORDER_ISOLATED) != 0;
 #endif
    CV_IPP_RUN(((ksize == 3 || ksize == 5) && (_src.type() == CV_8UC1 || _src.type() == CV_32FC1) &&
        (borderTypeNI == BORDER_CONSTANT || borderTypeNI == BORDER_REPLICATE) && CV_MAT_CN(_src.type()) == 1 &&
-        (!_src.isSubmatrix() || isolated)) && IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK, ipp_cornerHarris( _src, _dst, blockSize, ksize, k, borderType ));
-
-
-    Mat src = _src.getMat();
-    _dst.create( src.size(), CV_32FC1 );
-    Mat dst = _dst.getMat();
-
+        (!_src.isSubmatrix() || isolated)) && IPP_VERSION_X100 >= 810, ipp_cornerHarris( src, dst, blockSize, ksize, k, borderType ));

    cornerEigenValsVecs( src, dst, blockSize, ksize, HARRIS, k, borderType );
 }
--- a/modules/imgproc/src/filter.cpp
+++ b/modules/imgproc/src/filter.cpp
@ -1360,14 +1360,14 @@ struct RowVec_32f
    {
        kernel = _kernel;
        haveSSE = checkHardwareSupport(CV_CPU_SSE);
-#if defined USE_IPP_SEP_FILTERS && IPP_DISABLE_BLOCK
+#if defined USE_IPP_SEP_FILTERS
        bufsz = -1;
 #endif
    }

    int operator()(const uchar* _src, uchar* _dst, int width, int cn) const
    {
-#if defined USE_IPP_SEP_FILTERS && IPP_DISABLE_BLOCK
+#if defined USE_IPP_SEP_FILTERS
        CV_IPP_CHECK()
        {
            int ret = ippiOperator(_src, _dst, width, cn);
@ -1408,7 +1408,7 @@ struct RowVec_32f

    Mat kernel;
    bool haveSSE;
-#if defined USE_IPP_SEP_FILTERS && IPP_DISABLE_BLOCK
+#if defined USE_IPP_SEP_FILTERS
 private:
    mutable int bufsz;
    int ippiOperator(const uchar* _src, uchar* _dst, int width, int cn) const
@ -1436,10 +1436,10 @@ private:
        float borderValue[] = {0.f, 0.f, 0.f};
        // here is the trick. IPP needs border type and extrapolates the row. We did it already.
        // So we pass anchor=0 and ignore the right tail of results since they are incorrect there.
-        if( (cn == 1 && CV_INSTRUMENT_FUN_IPP(ippiFilterRowBorderPipeline_32f_C1R,(src, step, &dst, roisz, _kx, _ksize, 0,
-                                                            ippBorderRepl, borderValue[0], bufptr)) < 0) ||
-            (cn == 3 && CV_INSTRUMENT_FUN_IPP(ippiFilterRowBorderPipeline_32f_C3R,(src, step, &dst, roisz, _kx, _ksize, 0,
-                                                            ippBorderRepl, borderValue, bufptr)) < 0))
+        if( (cn == 1 && CV_INSTRUMENT_FUN_IPP(ippiFilterRowBorderPipeline_32f_C1R, src, step, &dst, roisz, _kx, _ksize, 0,
+                                                            ippBorderRepl, borderValue[0], bufptr) < 0) ||
+            (cn == 3 && CV_INSTRUMENT_FUN_IPP(ippiFilterRowBorderPipeline_32f_C3R, src, step, &dst, roisz, _kx, _ksize, 0,
+                                                            ippBorderRepl, borderValue, bufptr) < 0))
        {
            setIppErrorStatus();
            return 0;
--- a/modules/imgproc/src/hough.cpp
+++ b/modules/imgproc/src/hough.cpp
@ -96,7 +96,7 @@ HoughLinesStandard( const Mat& img, float rho, float theta,
    int numangle = cvRound((max_theta - min_theta) / theta);
    int numrho = cvRound(((width + height) * 2 + 1) / rho);

-#if defined HAVE_IPP && IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK
+#if defined HAVE_IPP && IPP_VERSION_X100 >= 810 && !IPP_DISABLE_HOUGH
    CV_IPP_CHECK()
    {
        IppiSize srcSize = { width, height };
@ -108,8 +108,8 @@ HoughLinesStandard( const Mat& img, float rho, float theta,
        int linesCount = 0;
        lines.resize(ipp_linesMax);
        IppStatus ok = ippiHoughLineGetSize_8u_C1R(srcSize, delta, ipp_linesMax, &bufferSize);
-        Ipp8u* buffer = ippsMalloc_8u(bufferSize);
-        if (ok >= 0) {ok = CV_INSTRUMENT_FUN_IPP(ippiHoughLine_Region_8u32f_C1R,(image, step, srcSize, (IppPointPolar*) &lines[0], dstRoi, ipp_linesMax, &linesCount, delta, threshold, buffer))};
+        Ipp8u* buffer = ippsMalloc_8u_L(bufferSize);
+        if (ok >= 0) {ok = CV_INSTRUMENT_FUN_IPP(ippiHoughLine_Region_8u32f_C1R, image, step, srcSize, (IppPointPolar*) &lines[0], dstRoi, ipp_linesMax, &linesCount, delta, threshold, buffer);};
        ippsFree(buffer);
        if (ok >= 0)
        {
@ -429,7 +429,7 @@ HoughLinesProbabilistic( Mat& image,
    int numangle = cvRound(CV_PI / theta);
    int numrho = cvRound(((width + height) * 2 + 1) / rho);

-#if defined HAVE_IPP && IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK
+#if defined HAVE_IPP && IPP_VERSION_X100 >= 810 && !IPP_DISABLE_HOUGH
    CV_IPP_CHECK()
    {
        IppiSize srcSize = { width, height };
@ -440,12 +440,12 @@ HoughLinesProbabilistic( Mat& image,
        int linesCount = 0;
        lines.resize(ipp_linesMax);
        IppStatus ok = ippiHoughProbLineGetSize_8u_C1R(srcSize, delta, &specSize, &bufferSize);
-        Ipp8u* buffer = ippsMalloc_8u(bufferSize);
-        pSpec = (IppiHoughProbSpec*) malloc(specSize);
+        Ipp8u* buffer = ippsMalloc_8u_L(bufferSize);
+        pSpec = (IppiHoughProbSpec*) ippsMalloc_8u_L(specSize);
        if (ok >= 0) ok = ippiHoughProbLineInit_8u32f_C1R(srcSize, delta, ippAlgHintNone, pSpec);
-        if (ok >= 0) {ok = CV_INSTRUMENT_FUN_IPP(ippiHoughProbLine_8u32f_C1R,(image.data, image.step, srcSize, threshold, lineLength, lineGap, (IppiPoint*) &lines[0], ipp_linesMax, &linesCount, buffer, pSpec))};
+        if (ok >= 0) {ok = CV_INSTRUMENT_FUN_IPP(ippiHoughProbLine_8u32f_C1R, image.data, (int)image.step, srcSize, threshold, lineLength, lineGap, (IppiPoint*) &lines[0], ipp_linesMax, &linesCount, buffer, pSpec);};

-        free(pSpec);
+        ippsFree(pSpec);
        ippsFree(buffer);
        if (ok >= 0)
        {
--- a/modules/imgproc/src/moments.cpp
+++ b/modules/imgproc/src/moments.cpp
@ -556,13 +556,94 @@ static bool ocl_moments( InputArray _src, Moments& m, bool binary)
        m.m03 += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
    }

+    completeMomentState( &m );
+
    return true;
 }

 #endif

+#ifdef HAVE_IPP
+typedef IppStatus (CV_STDCALL * ippiMoments)(const void* pSrc, int srcStep, IppiSize roiSize, IppiMomentState_64f* pCtx);
+
+static bool ipp_moments(Mat &src, Moments &m )
+{
+#if IPP_VERSION_X100 >= 900
+    CV_INSTRUMENT_REGION_IPP()
+
+    IppiSize  roi      = { src.cols, src.rows };
+    IppiPoint point    = { 0, 0 };
+    int       type     = src.type();
+    IppStatus ippStatus;
+
+    IppAutoBuffer<IppiMomentState_64f> state;
+    int stateSize = 0;
+
+    ippiMoments ippiMoments64f =
+        (type == CV_8UC1)?(ippiMoments)ippiMoments64f_8u_C1R:
+        (type == CV_16UC1)?(ippiMoments)ippiMoments64f_16u_C1R:
+        (type == CV_32FC1)?(ippiMoments)ippiMoments64f_32f_C1R:
+        NULL;
+    if(!ippiMoments64f)
+        return false;
+
+    ippStatus = ippiMomentGetStateSize_64f(ippAlgHintAccurate, &stateSize);
+    if(ippStatus < 0)
+        return false;
+
+    if(!state.allocate(stateSize) && stateSize)
+        return false;
+
+    ippStatus = ippiMomentInit_64f(state, ippAlgHintAccurate);
+    if(ippStatus < 0)
+        return false;
+
+    ippStatus = CV_INSTRUMENT_FUN_IPP(ippiMoments64f, src.ptr<Ipp8u>(), (int)src.step, roi, state);
+    if(ippStatus < 0)
+        return false;
+
+    ippStatus = ippiGetSpatialMoment_64f(state, 0, 0, 0, point, &m.m00);
+    if(ippStatus < 0)
+        return false;
+    ippiGetSpatialMoment_64f(state, 1, 0, 0, point, &m.m10);
+    ippiGetSpatialMoment_64f(state, 0, 1, 0, point, &m.m01);
+    ippiGetSpatialMoment_64f(state, 2, 0, 0, point, &m.m20);
+    ippiGetSpatialMoment_64f(state, 1, 1, 0, point, &m.m11);
+    ippiGetSpatialMoment_64f(state, 0, 2, 0, point, &m.m02);
+    ippiGetSpatialMoment_64f(state, 3, 0, 0, point, &m.m30);
+    ippiGetSpatialMoment_64f(state, 2, 1, 0, point, &m.m21);
+    ippiGetSpatialMoment_64f(state, 1, 2, 0, point, &m.m12);
+    ippiGetSpatialMoment_64f(state, 0, 3, 0, point, &m.m03);
+
+    ippStatus = ippiGetCentralMoment_64f(state, 2, 0, 0, &m.mu20);
+    if(ippStatus < 0)
+        return false;
+    ippiGetCentralMoment_64f(state, 1, 1, 0, &m.mu11);
+    ippiGetCentralMoment_64f(state, 0, 2, 0, &m.mu02);
+    ippiGetCentralMoment_64f(state, 3, 0, 0, &m.mu30);
+    ippiGetCentralMoment_64f(state, 2, 1, 0, &m.mu21);
+    ippiGetCentralMoment_64f(state, 1, 2, 0, &m.mu12);
+    ippiGetCentralMoment_64f(state, 0, 3, 0, &m.mu03);
+
+    ippStatus = ippiGetNormalizedCentralMoment_64f(state, 2, 0, 0, &m.nu20);
+    if(ippStatus < 0)
+        return false;
+    ippiGetNormalizedCentralMoment_64f(state, 1, 1, 0, &m.nu11);
+    ippiGetNormalizedCentralMoment_64f(state, 0, 2, 0, &m.nu02);
+    ippiGetNormalizedCentralMoment_64f(state, 3, 0, 0, &m.nu30);
+    ippiGetNormalizedCentralMoment_64f(state, 2, 1, 0, &m.nu21);
+    ippiGetNormalizedCentralMoment_64f(state, 1, 2, 0, &m.nu12);
+    ippiGetNormalizedCentralMoment_64f(state, 0, 3, 0, &m.nu03);
+
+    return true;
+#else
+    CV_UNUSED(src); CV_UNUSED(m);
+    return false;
+#endif
 }
+#endif

+}

 cv::Moments cv::moments( InputArray _src, bool binary )
 {
@ -579,159 +660,93 @@ cv::Moments cv::moments( InputArray _src, bool binary )
        return m;

 #ifdef HAVE_OPENCL
-    if( !(ocl::useOpenCL() && type == CV_8UC1  &&
-        _src.isUMat() && ocl_moments(_src, m, binary)) )
+    CV_OCL_RUN_(type == CV_8UC1 && _src.isUMat(), ocl_moments(_src, m, binary), m);
 #endif
-    {
-        Mat mat = _src.getMat();
-        if( mat.checkVector(2) >= 0 && (depth == CV_32F || depth == CV_32S))
-            return contourMoments(mat);
-
-        if( cn > 1 )
-            CV_Error( CV_StsBadArg, "Invalid image type (must be single-channel)" );

-#if IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK
-        CV_IPP_CHECK()
-        {
-            if (!binary)
-            {
-                IppiSize roi = { mat.cols, mat.rows };
-                IppiMomentState_64f * moment = NULL;
-                // ippiMomentInitAlloc_64f, ippiMomentFree_64f are deprecated in 8.1, but there are not another way
-                // to initialize IppiMomentState_64f. When GetStateSize and Init functions will appear we have to
-                // change our code.
-                CV_SUPPRESS_DEPRECATED_START
-                if (ippiMomentInitAlloc_64f(&moment, ippAlgHintAccurate) >= 0)
-                {
-                    typedef IppStatus (CV_STDCALL * ippiMoments)(const void * pSrc, int srcStep, IppiSize roiSize, IppiMomentState_64f* pCtx);
-                    ippiMoments ippFunc =
-                        type == CV_8UC1 ? (ippiMoments)ippiMoments64f_8u_C1R :
-                        type == CV_16UC1 ? (ippiMoments)ippiMoments64f_16u_C1R :
-                        type == CV_32FC1? (ippiMoments)ippiMoments64f_32f_C1R : 0;
-
-                    if (ippFunc)
-                    {
-                        if (CV_INSTRUMENT_FUN_IPP(ippFunc,(mat.data, (int)mat.step, roi, moment)) >= 0)
-                        {
-                            IppiPoint point = { 0, 0 };
-                            ippiGetSpatialMoment_64f(moment, 0, 0, 0, point, &m.m00);
-                            ippiGetSpatialMoment_64f(moment, 1, 0, 0, point, &m.m10);
-                            ippiGetSpatialMoment_64f(moment, 0, 1, 0, point, &m.m01);
-
-                            ippiGetSpatialMoment_64f(moment, 2, 0, 0, point, &m.m20);
-                            ippiGetSpatialMoment_64f(moment, 1, 1, 0, point, &m.m11);
-                            ippiGetSpatialMoment_64f(moment, 0, 2, 0, point, &m.m02);
-
-                            ippiGetSpatialMoment_64f(moment, 3, 0, 0, point, &m.m30);
-                            ippiGetSpatialMoment_64f(moment, 2, 1, 0, point, &m.m21);
-                            ippiGetSpatialMoment_64f(moment, 1, 2, 0, point, &m.m12);
-                            ippiGetSpatialMoment_64f(moment, 0, 3, 0, point, &m.m03);
-                            ippiGetCentralMoment_64f(moment, 2, 0, 0, &m.mu20);
-                            ippiGetCentralMoment_64f(moment, 1, 1, 0, &m.mu11);
-                            ippiGetCentralMoment_64f(moment, 0, 2, 0, &m.mu02);
-                            ippiGetCentralMoment_64f(moment, 3, 0, 0, &m.mu30);
-                            ippiGetCentralMoment_64f(moment, 2, 1, 0, &m.mu21);
-                            ippiGetCentralMoment_64f(moment, 1, 2, 0, &m.mu12);
-                            ippiGetCentralMoment_64f(moment, 0, 3, 0, &m.mu03);
-                            ippiGetNormalizedCentralMoment_64f(moment, 2, 0, 0, &m.nu20);
-                            ippiGetNormalizedCentralMoment_64f(moment, 1, 1, 0, &m.nu11);
-                            ippiGetNormalizedCentralMoment_64f(moment, 0, 2, 0, &m.nu02);
-                            ippiGetNormalizedCentralMoment_64f(moment, 3, 0, 0, &m.nu30);
-                            ippiGetNormalizedCentralMoment_64f(moment, 2, 1, 0, &m.nu21);
-                            ippiGetNormalizedCentralMoment_64f(moment, 1, 2, 0, &m.nu12);
-                            ippiGetNormalizedCentralMoment_64f(moment, 0, 3, 0, &m.nu03);
-
-                            ippiMomentFree_64f(moment);
-                            CV_IMPL_ADD(CV_IMPL_IPP);
-                            return m;
-                        }
-                        setIppErrorStatus();
-                    }
-                    ippiMomentFree_64f(moment);
-                }
-                else
-                    setIppErrorStatus();
-                CV_SUPPRESS_DEPRECATED_END
-            }
-        }
-#endif
+    Mat mat = _src.getMat();
+    if( mat.checkVector(2) >= 0 && (depth == CV_32F || depth == CV_32S))
+        return contourMoments(mat);
+
+    if( cn > 1 )
+        CV_Error( CV_StsBadArg, "Invalid image type (must be single-channel)" );
+
+    CV_IPP_RUN(!binary, ipp_moments(mat, m), m);
+
+    if( binary || depth == CV_8U )
+        func = momentsInTile<uchar, int, int>;
+    else if( depth == CV_16U )
+        func = momentsInTile<ushort, int, int64>;
+    else if( depth == CV_16S )
+        func = momentsInTile<short, int, int64>;
+    else if( depth == CV_32F )
+        func = momentsInTile<float, double, double>;
+    else if( depth == CV_64F )
+        func = momentsInTile<double, double, double>;
+    else
+        CV_Error( CV_StsUnsupportedFormat, "" );

-        if( binary || depth == CV_8U )
-            func = momentsInTile<uchar, int, int>;
-        else if( depth == CV_16U )
-            func = momentsInTile<ushort, int, int64>;
-        else if( depth == CV_16S )
-            func = momentsInTile<short, int, int64>;
-        else if( depth == CV_32F )
-            func = momentsInTile<float, double, double>;
-        else if( depth == CV_64F )
-            func = momentsInTile<double, double, double>;
-        else
-            CV_Error( CV_StsUnsupportedFormat, "" );
+    Mat src0(mat);

-        Mat src0(mat);
+    for( int y = 0; y < size.height; y += TILE_SIZE )
+    {
+        Size tileSize;
+        tileSize.height = std::min(TILE_SIZE, size.height - y);

-        for( int y = 0; y < size.height; y += TILE_SIZE )
+        for( int x = 0; x < size.width; x += TILE_SIZE )
        {
-            Size tileSize;
-            tileSize.height = std::min(TILE_SIZE, size.height - y);
+            tileSize.width = std::min(TILE_SIZE, size.width - x);
+            Mat src(src0, cv::Rect(x, y, tileSize.width, tileSize.height));

-            for( int x = 0; x < size.width; x += TILE_SIZE )
+            if( binary )
            {
-                tileSize.width = std::min(TILE_SIZE, size.width - x);
-                Mat src(src0, cv::Rect(x, y, tileSize.width, tileSize.height));
-
-                if( binary )
-                {
-                    cv::Mat tmp(tileSize, CV_8U, nzbuf);
-                    cv::compare( src, 0, tmp, CV_CMP_NE );
-                    src = tmp;
-                }
+                cv::Mat tmp(tileSize, CV_8U, nzbuf);
+                cv::compare( src, 0, tmp, CV_CMP_NE );
+                src = tmp;
+            }

-                double mom[10];
-                func( src, mom );
+            double mom[10];
+            func( src, mom );

-                if(binary)
-                {
-                    double s = 1./255;
-                    for( int k = 0; k < 10; k++ )
-                        mom[k] *= s;
-                }
+            if(binary)
+            {
+                double s = 1./255;
+                for( int k = 0; k < 10; k++ )
+                    mom[k] *= s;
+            }

-                double xm = x * mom[0], ym = y * mom[0];
+            double xm = x * mom[0], ym = y * mom[0];

-                // accumulate moments computed in each tile
+            // accumulate moments computed in each tile

-                // + m00 ( = m00' )
-                m.m00 += mom[0];
+            // + m00 ( = m00' )
+            m.m00 += mom[0];

-                // + m10 ( = m10' + x*m00' )
-                m.m10 += mom[1] + xm;
+            // + m10 ( = m10' + x*m00' )
+            m.m10 += mom[1] + xm;

-                // + m01 ( = m01' + y*m00' )
-                m.m01 += mom[2] + ym;
+            // + m01 ( = m01' + y*m00' )
+            m.m01 += mom[2] + ym;

-                // + m20 ( = m20' + 2*x*m10' + x*x*m00' )
-                m.m20 += mom[3] + x * (mom[1] * 2 + xm);
+            // + m20 ( = m20' + 2*x*m10' + x*x*m00' )
+            m.m20 += mom[3] + x * (mom[1] * 2 + xm);

-                // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
-                m.m11 += mom[4] + x * (mom[2] + ym) + y * mom[1];
+            // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
+            m.m11 += mom[4] + x * (mom[2] + ym) + y * mom[1];

-                // + m02 ( = m02' + 2*y*m01' + y*y*m00' )
-                m.m02 += mom[5] + y * (mom[2] * 2 + ym);
+            // + m02 ( = m02' + 2*y*m01' + y*y*m00' )
+            m.m02 += mom[5] + y * (mom[2] * 2 + ym);

-                // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
-                m.m30 += mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
+            // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
+            m.m30 += mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));

-                // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
-                m.m21 += mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
+            // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
+            m.m21 += mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];

-                // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
-                m.m12 += mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
+            // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
+            m.m12 += mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];

-                // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
-                m.m03 += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
-            }
+            // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
+            m.m03 += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
        }
    }

--- a/modules/imgproc/src/pyramids.cpp
+++ b/modules/imgproc/src/pyramids.cpp
@ -1200,7 +1200,7 @@ static bool ipp_pyrdown( InputArray _src, OutputArray _dst, const Size& _dsz, in
 {
    CV_INSTRUMENT_REGION_IPP()

-#if IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK
+#if IPP_VERSION_X100 >= 810 && !IPP_DISABLE_PYRAMIDS_DOWN
    Size dsz = _dsz.area() == 0 ? Size((_src.cols() + 1)/2, (_src.rows() + 1)/2) : _dsz;
    bool isolated = (borderType & BORDER_ISOLATED) != 0;
    int borderTypeNI = borderType & ~BORDER_ISOLATED;
@ -1235,7 +1235,7 @@ static bool ipp_pyrdown( InputArray _src, OutputArray _dst, const Size& _dsz, in
                CV_SUPPRESS_DEPRECATED_END
                if (ok >= 0)
                {
-                    Ipp8u* buffer = ippsMalloc_8u(bufferSize);
+                    Ipp8u* buffer = ippsMalloc_8u_L(bufferSize);
                    ok = pyrUpFunc(src.data, (int) src.step, dst.data, (int) dst.step, srcRoi, buffer);
                    ippsFree(buffer);

@ -1388,7 +1388,7 @@ static bool ipp_pyrup( InputArray _src, OutputArray _dst, const Size& _dsz, int
 {
    CV_INSTRUMENT_REGION_IPP()

-#if IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK
+#if IPP_VERSION_X100 >= 810 && !IPP_DISABLE_PYRAMIDS_UP
    Size sz = _src.dims() <= 2 ? _src.size() : Size();
    Size dsz = _dsz.area() == 0 ? Size(_src.cols()*2, _src.rows()*2) : _dsz;

@ -1421,7 +1421,7 @@ static bool ipp_pyrup( InputArray _src, OutputArray _dst, const Size& _dsz, int
                CV_SUPPRESS_DEPRECATED_END
                if (ok >= 0)
                {
-                    Ipp8u* buffer = ippsMalloc_8u(bufferSize);
+                    Ipp8u* buffer = ippsMalloc_8u_L(bufferSize);
                    ok = pyrUpFunc(src.data, (int) src.step, dst.data, (int) dst.step, srcRoi, buffer);
                    ippsFree(buffer);

@ -1496,7 +1496,7 @@ static bool ipp_buildpyramid( InputArray _src, OutputArrayOfArrays _dst, int max
 {
    CV_INSTRUMENT_REGION_IPP()

-#if IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK
+#if IPP_VERSION_X100 >= 810 && !IPP_DISABLE_PYRAMIDS_BUILD
    Mat src = _src.getMat();
    _dst.create( maxlevel + 1, 1, 0 );
    _dst.getMatRef(0) = src;
@ -1626,7 +1626,7 @@ void cv::buildPyramid( InputArray _src, OutputArrayOfArrays _dst, int maxlevel,

    int i=1;

-    CV_IPP_RUN(((IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK) && ((borderType & ~BORDER_ISOLATED) == BORDER_DEFAULT && (!_src.isSubmatrix() || ((borderType & BORDER_ISOLATED) != 0)))),
+    CV_IPP_RUN(((IPP_VERSION_X100 >= 810) && ((borderType & ~BORDER_ISOLATED) == BORDER_DEFAULT && (!_src.isSubmatrix() || ((borderType & BORDER_ISOLATED) != 0)))),
        ipp_buildpyramid( _src,  _dst,  maxlevel,  borderType));

    for( ; i <= maxlevel; i++ )
--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
@ -1734,98 +1734,84 @@ namespace cv
 }
 #endif

-// TODO: IPP performance regression
-#if defined(HAVE_IPP) && IPP_DISABLE_BLOCK
+#if defined(HAVE_IPP)
 namespace cv
 {
-static bool ipp_boxfilter( InputArray _src, OutputArray _dst, int ddepth,
-                Size ksize, Point anchor,
-                bool normalize, int borderType )
+static bool ipp_boxfilter(Mat &src, Mat &dst, Size ksize, Point anchor, bool normalize, int borderType)
 {
    CV_INSTRUMENT_REGION_IPP()

-    int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype);
-    if( ddepth < 0 )
-        ddepth = sdepth;
-    int ippBorderType = borderType & ~BORDER_ISOLATED;
+    // Problem with SSE42 optimization for 16s
+#if IPP_DISABLE_PERF_BOX16S_SSE42
+    if(src.depth() == CV_16S && !(ipp::getIppFeatures()&ippCPUID_AVX))
+        return false;
+#endif
+
+    int stype = src.type(), cn = CV_MAT_CN(stype);
+    IppiBorderType ippBorderType = ippiGetBorderType(borderType & ~BORDER_ISOLATED);
+    IppDataType ippType = ippiGetDataType(stype);
    Point ocvAnchor, ippAnchor;
    ocvAnchor.x = anchor.x < 0 ? ksize.width / 2 : anchor.x;
    ocvAnchor.y = anchor.y < 0 ? ksize.height / 2 : anchor.y;
    ippAnchor.x = ksize.width / 2 - (ksize.width % 2 == 0 ? 1 : 0);
    ippAnchor.y = ksize.height / 2 - (ksize.height % 2 == 0 ? 1 : 0);

-    Mat src = _src.getMat();
-    _dst.create( src.size(), CV_MAKETYPE(ddepth, cn) );
-    Mat dst = _dst.getMat();
-    if( borderType != BORDER_CONSTANT && normalize && (borderType & BORDER_ISOLATED) != 0 )
+    if(normalize && (!src.isSubmatrix() || borderType&BORDER_ISOLATED) && stype == dst.type() &&
+        (ippBorderType == ippBorderRepl || /* returns ippStsStepErr: Step value is not valid */
+            ippBorderType == ippBorderConst ||
+            ippBorderType == ippBorderMirror) && ocvAnchor == ippAnchor) // returns ippStsMaskSizeErr: mask has an illegal value
    {
-        if( src.rows == 1 )
-            ksize.height = 1;
-        if( src.cols == 1 )
-            ksize.width = 1;
-    }
+        IppStatus status;
+        Ipp32s bufSize = 0;
+        IppiSize roiSize = { dst.cols, dst.rows };
+        IppiSize maskSize = { ksize.width, ksize.height };
+        IppAutoBuffer<Ipp8u> buffer;

-    {
-        if (normalize && !src.isSubmatrix() && ddepth == sdepth &&
-            (/*ippBorderType == BORDER_REPLICATE ||*/ /* returns ippStsStepErr: Step value is not valid */
-             ippBorderType == BORDER_CONSTANT) && ocvAnchor == ippAnchor &&
-             dst.cols != ksize.width && dst.rows != ksize.height) // returns ippStsMaskSizeErr: mask has an illegal value
-        {
-            Ipp32s bufSize = 0;
-            IppiSize roiSize = { dst.cols, dst.rows }, maskSize = { ksize.width, ksize.height };
-
-#define IPP_FILTER_BOX_BORDER(ippType, ippDataType, flavor) \
-            do \
-            { \
-                if (ippiFilterBoxBorderGetBufferSize(roiSize, maskSize, ippDataType, cn, &bufSize) >= 0) \
-                { \
-                    Ipp8u * buffer = ippsMalloc_8u(bufSize); \
-                    ippType borderValue[4] = { 0, 0, 0, 0 }; \
-                    ippBorderType = ippBorderType == BORDER_CONSTANT ? ippBorderConst : ippBorderRepl; \
-                    IppStatus status = CV_INSTRUMENT_FUN_IPP(ippiFilterBoxBorder_##flavor, src.ptr<ippType>(), (int)src.step, dst.ptr<ippType>(), \
-                                                                    (int)dst.step, roiSize, maskSize, \
-                                                                    (IppiBorderType)ippBorderType, borderValue, buffer); \
-                    ippsFree(buffer); \
-                    if (status >= 0) \
-                    { \
-                        CV_IMPL_ADD(CV_IMPL_IPP); \
-                        return true; \
-                    } \
-                } \
-            } while ((void)0, 0)
-
-            if (stype == CV_8UC1)
-                IPP_FILTER_BOX_BORDER(Ipp8u, ipp8u, 8u_C1R);
-            else if (stype == CV_8UC3)
-                IPP_FILTER_BOX_BORDER(Ipp8u, ipp8u, 8u_C3R);
-            else if (stype == CV_8UC4)
-                IPP_FILTER_BOX_BORDER(Ipp8u, ipp8u, 8u_C4R);
-
-            // Oct 2014: performance with BORDER_CONSTANT
-            //else if (stype == CV_16UC1)
-            //    IPP_FILTER_BOX_BORDER(Ipp16u, ipp16u, 16u_C1R);
-            else if (stype == CV_16UC3)
-                IPP_FILTER_BOX_BORDER(Ipp16u, ipp16u, 16u_C3R);
-            else if (stype == CV_16UC4)
-                IPP_FILTER_BOX_BORDER(Ipp16u, ipp16u, 16u_C4R);
-
-            // Oct 2014: performance with BORDER_CONSTANT
-            //else if (stype == CV_16SC1)
-            //    IPP_FILTER_BOX_BORDER(Ipp16s, ipp16s, 16s_C1R);
-            else if (stype == CV_16SC3)
-                IPP_FILTER_BOX_BORDER(Ipp16s, ipp16s, 16s_C3R);
-            else if (stype == CV_16SC4)
-                IPP_FILTER_BOX_BORDER(Ipp16s, ipp16s, 16s_C4R);
-
-            else if (stype == CV_32FC1)
-                IPP_FILTER_BOX_BORDER(Ipp32f, ipp32f, 32f_C1R);
-            else if (stype == CV_32FC3)
-                IPP_FILTER_BOX_BORDER(Ipp32f, ipp32f, 32f_C3R);
-            else if (stype == CV_32FC4)
-                IPP_FILTER_BOX_BORDER(Ipp32f, ipp32f, 32f_C4R);
+        if(ippiFilterBoxBorderGetBufferSize(roiSize, maskSize, ippType, cn, &bufSize) < 0)
+            return false;
+
+        buffer.allocate(bufSize);
+
+        #define IPP_FILTER_BOX_BORDER(ippType, flavor)\
+        {\
+            ippType borderValue[4] = { 0, 0, 0, 0 };\
+            status = CV_INSTRUMENT_FUN_IPP(ippiFilterBoxBorder_##flavor, src.ptr<ippType>(), (int)src.step, dst.ptr<ippType>(),\
+                                            (int)dst.step, roiSize, maskSize,\
+                                            ippBorderType, borderValue, buffer);\
        }
-#undef IPP_FILTER_BOX_BORDER
+
+        if (stype == CV_8UC1)
+            IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C1R)
+        else if (stype == CV_8UC3)
+            IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C3R)
+        else if (stype == CV_8UC4)
+            IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C4R)
+        else if (stype == CV_16UC1)
+            IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C1R)
+        else if (stype == CV_16UC3)
+            IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C3R)
+        else if (stype == CV_16UC4)
+            IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C4R)
+        else if (stype == CV_16SC1)
+            IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C1R)
+        else if (stype == CV_16SC3)
+            IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C3R)
+        else if (stype == CV_16SC4)
+            IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C4R)
+        else if (stype == CV_32FC1)
+            IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C1R)
+        else if (stype == CV_32FC3)
+            IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C3R)
+        else if (stype == CV_32FC4)
+            IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C4R)
+        else
+            return false;
+
+        if(status >= 0)
+            return true;
    }
+#undef IPP_FILTER_BOX_BORDER
+
    return false;
 }
 }
@ -1866,19 +1852,7 @@ void cv::boxFilter( InputArray _src, OutputArray _dst, int ddepth,
        return;
 #endif

-#if defined HAVE_IPP && IPP_DISABLE_BLOCK
-    int ippBorderType = borderType & ~BORDER_ISOLATED;
-    Point ocvAnchor, ippAnchor;
-    ocvAnchor.x = anchor.x < 0 ? ksize.width / 2 : anchor.x;
-    ocvAnchor.y = anchor.y < 0 ? ksize.height / 2 : anchor.y;
-    ippAnchor.x = ksize.width / 2 - (ksize.width % 2 == 0 ? 1 : 0);
-    ippAnchor.y = ksize.height / 2 - (ksize.height % 2 == 0 ? 1 : 0);
-    CV_IPP_RUN((normalize && !_src.isSubmatrix() && ddepth == sdepth &&
-            (/*ippBorderType == BORDER_REPLICATE ||*/ /* returns ippStsStepErr: Step value is not valid */
-             ippBorderType == BORDER_CONSTANT) && ocvAnchor == ippAnchor &&
-             _dst.cols() != ksize.width && _dst.rows() != ksize.height),
-             ipp_boxfilter( _src,  _dst,  ddepth, ksize,  anchor, normalize,  borderType));
-#endif
+    CV_IPP_RUN_FAST(ipp_boxfilter(src, dst, ksize, anchor, normalize, borderType));

    Point ofs;
    Size wsz(src.cols, src.rows);
@ -3691,53 +3665,6 @@ private:
    float *space_weight, *color_weight;
 };

-#if defined (HAVE_IPP) && IPP_DISABLE_BLOCK
-class IPPBilateralFilter_8u_Invoker :
-    public ParallelLoopBody
-{
-public:
-    IPPBilateralFilter_8u_Invoker(Mat &_src, Mat &_dst, double _sigma_color, double _sigma_space, int _radius, bool *_ok) :
-      ParallelLoopBody(), src(_src), dst(_dst), sigma_color(_sigma_color), sigma_space(_sigma_space), radius(_radius), ok(_ok)
-      {
-          *ok = true;
-      }
-
-      virtual void operator() (const Range& range) const
-      {
-          int d = radius * 2 + 1;
-          IppiSize kernel = {d, d};
-          IppiSize roi={dst.cols, range.end - range.start};
-          int bufsize=0;
-          if (0 > ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize))
-          {
-              *ok = false;
-              return;
-          }
-          AutoBuffer<uchar> buf(bufsize);
-          IppiFilterBilateralSpec *pSpec = (IppiFilterBilateralSpec *)alignPtr(&buf[0], 32);
-          if (0 > ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, (Ipp32f)sigma_color, (Ipp32f)sigma_space, 1, pSpec ))
-          {
-              *ok = false;
-              return;
-          }
-          if (0 > ippiFilterBilateral_8u_C1R( src.ptr<uchar>(range.start) + radius * ((int)src.step[0] + 1), (int)src.step[0], dst.ptr<uchar>(range.start), (int)dst.step[0], roi, kernel, pSpec ))
-              *ok = false;
-          else
-          {
-            CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
-          }
-      }
-private:
-    Mat &src;
-    Mat &dst;
-    double sigma_color;
-    double sigma_space;
-    int radius;
-    bool *ok;
-    const IPPBilateralFilter_8u_Invoker& operator= (const IPPBilateralFilter_8u_Invoker&);
-};
-#endif
-
 #ifdef HAVE_OPENCL

 static bool ocl_bilateralFilter_8u(InputArray _src, OutputArray _dst, int d,
@ -3861,24 +3788,6 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
    Mat temp;
    copyMakeBorder( src, temp, radius, radius, radius, radius, borderType );

-#if defined HAVE_IPP && (IPP_VERSION_X100 >= 700) && IPP_DISABLE_BLOCK
-    CV_IPP_CHECK()
-    {
-        if( cn == 1 )
-        {
-            bool ok;
-            IPPBilateralFilter_8u_Invoker body(temp, dst, sigma_color * sigma_color, sigma_space * sigma_space, radius, &ok );
-            parallel_for_(Range(0, dst.rows), body, dst.total()/(double)(1<<16));
-            if( ok )
-            {
-                CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
-                return;
-            }
-            setIppErrorStatus();
-        }
-    }
-#endif
-
    std::vector<float> _color_weight(cn*256);
    std::vector<float> _space_weight(d*d);
    std::vector<int> _space_ofs(d*d);
@ -4293,6 +4202,107 @@ bilateralFilter_32f( const Mat& src, Mat& dst, int d,
    parallel_for_(Range(0, size.height), body, dst.total()/(double)(1<<16));
 }

+#ifdef HAVE_IPP
+#define IPP_BILATERAL_PARALLEL 1
+
+#ifdef HAVE_IPP_IW
+class ipp_bilateralFilterParallel: public ParallelLoopBody
+{
+public:
+    ipp_bilateralFilterParallel(::ipp::IwiImage &_src, ::ipp::IwiImage &_dst, int _radius, Ipp32f _valSquareSigma, Ipp32f _posSquareSigma, ::ipp::IwiBorderType _borderType, bool *_ok):
+        src(_src), dst(_dst)
+    {
+        pOk = _ok;
+
+        radius          = _radius;
+        valSquareSigma  = _valSquareSigma;
+        posSquareSigma  = _posSquareSigma;
+        borderType      = _borderType;
+
+        *pOk = true;
+    }
+    ~ipp_bilateralFilterParallel() {}
+
+    virtual void operator() (const Range& range) const
+    {
+        if(*pOk == false)
+            return;
+
+        try
+        {
+            ::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, dst.m_size.width, range.end - range.start);
+            CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, &src, &dst, radius, valSquareSigma, posSquareSigma, ippiFilterBilateralGauss, ippDistNormL1, borderType, &roi);
+        }
+        catch(::ipp::IwException)
+        {
+            *pOk = false;
+            return;
+        }
+    }
+private:
+    ::ipp::IwiImage &src;
+    ::ipp::IwiImage &dst;
+
+    int                  radius;
+    Ipp32f               valSquareSigma;
+    Ipp32f               posSquareSigma;
+    ::ipp::IwiBorderType borderType;
+
+    bool  *pOk;
+    const ipp_bilateralFilterParallel& operator= (const ipp_bilateralFilterParallel&);
+};
+#endif
+
+static bool ipp_bilateralFilter(Mat &src, Mat &dst, int d, double sigmaColor, double sigmaSpace, int borderType)
+{
+#ifdef HAVE_IPP_IW
+    CV_INSTRUMENT_REGION_IPP()
+
+    int         radius         = IPP_MAX(((d <= 0)?cvRound(sigmaSpace*1.5):d/2), 1);
+    Ipp32f      valSquareSigma = (Ipp32f)((sigmaColor <= 0)?1:sigmaColor*sigmaColor);
+    Ipp32f      posSquareSigma = (Ipp32f)((sigmaSpace <= 0)?1:sigmaSpace*sigmaSpace);
+
+    // Acquire data and begin processing
+    try
+    {
+        ::ipp::IwiImage      iwSrc = ippiGetImage(src);
+        ::ipp::IwiImage      iwDst = ippiGetImage(dst);
+        ::ipp::IwiBorderSize borderSize(radius);
+        ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
+        if(!ippBorder.m_borderType)
+            return false;
+
+        // IW 2017u2 has bug which doesn't allow use of partial inMem with tiling
+        if((((ippBorder.m_borderFlags)&ippBorderInMem) && ((ippBorder.m_borderFlags)&ippBorderInMem) != ippBorderInMem))
+            return false;
+
+        bool  ok      = true;
+        int   threads = ippiSuggestThreadsNum(iwDst, 2);
+        Range range(0, (int)iwDst.m_size.height);
+        ipp_bilateralFilterParallel invoker(iwSrc, iwDst, radius, valSquareSigma, posSquareSigma, ippBorder, &ok);
+        if(!ok)
+            return false;
+
+        if(IPP_BILATERAL_PARALLEL && threads > 1)
+            parallel_for_(range, invoker, threads*4);
+        else
+            invoker(range);
+
+        if(!ok)
+            return false;
+    }
+    catch (::ipp::IwException)
+    {
+        return false;
+    }
+    return true;
+#else
+    CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(d); CV_UNUSED(sigmaColor); CV_UNUSED(sigmaSpace); CV_UNUSED(borderType);
+    return false;
+#endif
+}
+#endif
+
 }

 void cv::bilateralFilter( InputArray _src, OutputArray _dst, int d,
@ -4308,6 +4318,8 @@ void cv::bilateralFilter( InputArray _src, OutputArray _dst, int d,

    Mat src = _src.getMat(), dst = _dst.getMat();

+    CV_IPP_RUN_FAST(ipp_bilateralFilter(src, dst, d, sigmaColor, sigmaSpace, borderType));
+
    if( src.depth() == CV_8U )
        bilateralFilter_8u( src, dst, d, sigmaColor, sigmaSpace, borderType );
    else if( src.depth() == CV_32F )
--- a/modules/imgproc/src/sumpixels.cpp
+++ b/modules/imgproc/src/sumpixels.cpp
@ -405,58 +405,43 @@ static bool ipp_integral(
    const uchar* src, size_t srcstep,
    uchar* sum, size_t sumstep,
    uchar* sqsum, size_t sqsumstep,
+    uchar* tilted, size_t tstep,
    int width, int height, int cn)
 {
    CV_INSTRUMENT_REGION_IPP()

-#if IPP_VERSION_X100 != 900 // Disabled on ICV due invalid results
-    if( sdepth <= 0 )
-        sdepth = depth == CV_8U ? CV_32S : CV_64F;
-    if ( sqdepth <= 0 )
-         sqdepth = CV_64F;
-    sdepth = CV_MAT_DEPTH(sdepth), sqdepth = CV_MAT_DEPTH(sqdepth);
+    IppiSize size = {width, height};

-    if( ( depth == CV_8U ) && ( sdepth == CV_32F || sdepth == CV_32S ) && ( !sqsum || sqdepth == CV_64F ) && ( cn == 1 ) )
+    if(cn > 1)
+        return false;
+    if(tilted)
    {
-        IppStatus status = ippStsErr;
-        IppiSize srcRoiSize = ippiSize( width, height );
-        if( sdepth == CV_32F )
-        {
-            if( sqsum )
-            {
-                status = CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32f64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, srcRoiSize, 0, 0);
-            }
-            else
-            {
-                status = CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, srcRoiSize, 0);
-            }
-        }
-        else if( sdepth == CV_32S )
-        {
-            if( sqsum )
-            {
-                status = CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, srcRoiSize, 0, 0);
-            }
-            else
-            {
-                status = CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, srcRoiSize, 0);
-            }
-        }
-        if (0 <= status)
-        {
-            CV_IMPL_ADD(CV_IMPL_IPP);
-            return true;
-        }
+        CV_UNUSED(tstep);
+        return false;
+    }
+
+    if(!sqsum)
+    {
+        if(depth == CV_8U && sdepth == CV_32S)
+            return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, size, 0) >= 0;
+        else if(depth == CV_8UC1 && sdepth == CV_32F)
+            return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size, 0) >= 0;
+        else if(depth == CV_32FC1 && sdepth == CV_32F)
+            return CV_INSTRUMENT_FUN_IPP(ippiIntegral_32f_C1R, (const Ipp32f*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size) >= 0;
+        else
+            return false;
+    }
+    else
+    {
+        if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_32S)
+            return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp32s*)sqsum, (int)sqsumstep, size, 0, 0) >= 0;
+        else if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F)
+            return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0;
+        else if(depth == CV_8U && sdepth == CV_32F && sqdepth == CV_64F)
+            return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32f64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0;
+        else
+            return false;
    }
-#else
-    CV_UNUSED(depth); CV_UNUSED(sdepth); CV_UNUSED(sqdepth);
-    CV_UNUSED(src); CV_UNUSED(srcstep);
-    CV_UNUSED(sum); CV_UNUSED(sumstep);
-    CV_UNUSED(sqsum); CV_UNUSED(sqsumstep);
-    CV_UNUSED(tilted); CV_UNUSED(tstep);
-    CV_UNUSED(width); CV_UNUSED(height); CV_UNUSED(cn);
-#endif
-    return false;
 }
 }
 #endif
@ -471,12 +456,7 @@ void integral(int depth, int sdepth, int sqdepth,
              int width, int height, int cn)
 {
    CALL_HAL(integral, cv_hal_integral, depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn);
-    CV_IPP_RUN(( depth == CV_8U )
-               && ( sdepth == CV_32F || sdepth == CV_32S )
-               && ( !tilted )
-               && ( !sqsum || sqdepth == CV_64F )
-               && ( cn == 1 ),
-               ipp_integral(depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, width, height, cn));
+    CV_IPP_RUN_FAST(ipp_integral(depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn));

 #define ONE_CALL(A, B, C) integral_<A, B, C>((const A*)src, srcstep, (B*)sum, sumstep, (C*)sqsum, sqsumstep, (B*)tilted, tstep, width, height, cn)

--- a/modules/imgproc/test/test_bilateral_filter.cpp
+++ b/modules/imgproc/test/test_bilateral_filter.cpp
@ -251,20 +251,23 @@ namespace cvtest

    int CV_BilateralFilterTest::validate_test_results(int test_case_index)
    {
-        static const double eps = 4;
-
+        double eps = (_src.depth() < CV_32F)?1:5e-3;
+        double e;
        Mat reference_dst, reference_src;
        if (_src.depth() == CV_32F)
+        {
            reference_bilateral_filter(_src, reference_dst, _d, _sigma_color, _sigma_space);
+            e = cvtest::norm(reference_dst, _parallel_dst, NORM_INF|NORM_RELATIVE);
+        }
        else
        {
            int type = _src.type();
            _src.convertTo(reference_src, CV_32F);
            reference_bilateral_filter(reference_src, reference_dst, _d, _sigma_color, _sigma_space);
            reference_dst.convertTo(reference_dst, type);
+            e = cvtest::norm(reference_dst, _parallel_dst, NORM_INF);
        }

-        double e = cvtest::norm(reference_dst, _parallel_dst, NORM_L2);
        if (e > eps)
        {
            ts->printf(cvtest::TS::CONSOLE, "actual error: %g, expected: %g", e, eps);
--- a/modules/imgproc/test/test_houghLines.cpp
+++ b/modules/imgproc/test/test_houghLines.cpp
@ -189,7 +189,7 @@ void BaseHoughLineTest::run_test(int type)
    else if (type == PROBABILISTIC)
        count = countMatIntersection<Vec4i>(exp_lines, lines, 1e-4f, 0.f);

-#if defined HAVE_IPP && IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK
+#if defined HAVE_IPP && IPP_VERSION_X100 >= 810 && !IPP_DISABLE_HOUGH
    EXPECT_GE( count, (int) (exp_lines.total() * 0.8) );
 #else
    EXPECT_EQ( count, (int) exp_lines.total());
--- a/modules/objdetect/src/cascadedetect.cpp
+++ b/modules/objdetect/src/cascadedetect.cpp
@ -484,6 +484,8 @@ bool FeatureEvaluator::updateScaleData( Size imgsz, const std::vector<float>& _s

 bool FeatureEvaluator::setImage( InputArray _image, const std::vector<float>& _scales )
 {
+    CV_INSTRUMENT_REGION()
+
    Size imgsz = _image.size();
    bool recalcOptFeatures = updateScaleData(imgsz, _scales);

@ -628,6 +630,8 @@ Ptr<FeatureEvaluator> HaarEvaluator::clone() const

 void HaarEvaluator::computeChannels(int scaleIdx, InputArray img)
 {
+    CV_INSTRUMENT_REGION()
+
    const ScaleData& s = scaleData->at(scaleIdx);
    sqofs = hasTiltedFeatures ? sbufSize.area() * 2 : sbufSize.area();

@ -670,6 +674,8 @@ void HaarEvaluator::computeChannels(int scaleIdx, InputArray img)

 void HaarEvaluator::computeOptFeatures()
 {
+    CV_INSTRUMENT_REGION()
+
    if (hasTiltedFeatures)
        tofs = sbufSize.area();

@ -916,6 +922,8 @@ void CascadeClassifierImpl::read(const FileNode& node)

 int CascadeClassifierImpl::runAt( Ptr<FeatureEvaluator>& evaluator, Point pt, int scaleIdx, double& weight )
 {
+    CV_INSTRUMENT_REGION()
+
    assert( !oldCascade &&
           (data.featureType == FeatureEvaluator::HAAR ||
            data.featureType == FeatureEvaluator::LBP ||
@ -984,6 +992,8 @@ public:

    void operator()(const Range& range) const
    {
+        CV_INSTRUMENT_REGION()
+
        Ptr<FeatureEvaluator> evaluator = classifier->featureEvaluator->clone();
        double gypWeight = 0.;
        Size origWinSize = classifier->data.origWinSize;
--- a/modules/objdetect/src/cascadedetect.hpp
+++ b/modules/objdetect/src/cascadedetect.hpp
@ -489,6 +489,8 @@ template<class FEval>
 inline int predictOrdered( CascadeClassifierImpl& cascade,
                           Ptr<FeatureEvaluator> &_featureEvaluator, double& sum )
 {
+    CV_INSTRUMENT_REGION()
+
    int nstages = (int)cascade.data.stages.size();
    int nodeOfs = 0, leafOfs = 0;
    FEval& featureEvaluator = (FEval&)*_featureEvaluator;
@ -529,6 +531,8 @@ template<class FEval>
 inline int predictCategorical( CascadeClassifierImpl& cascade,
                               Ptr<FeatureEvaluator> &_featureEvaluator, double& sum )
 {
+    CV_INSTRUMENT_REGION()
+
    int nstages = (int)cascade.data.stages.size();
    int nodeOfs = 0, leafOfs = 0;
    FEval& featureEvaluator = (FEval&)*_featureEvaluator;
@ -571,6 +575,8 @@ template<class FEval>
 inline int predictOrderedStump( CascadeClassifierImpl& cascade,
                                Ptr<FeatureEvaluator> &_featureEvaluator, double& sum )
 {
+    CV_INSTRUMENT_REGION()
+
    CV_Assert(!cascade.data.stumps.empty());
    FEval& featureEvaluator = (FEval&)*_featureEvaluator;
    const CascadeClassifierImpl::Data::Stump* cascadeStumps = &cascade.data.stumps[0];
@ -608,6 +614,8 @@ template<class FEval>
 inline int predictCategoricalStump( CascadeClassifierImpl& cascade,
                                    Ptr<FeatureEvaluator> &_featureEvaluator, double& sum )
 {
+    CV_INSTRUMENT_REGION()
+
    CV_Assert(!cascade.data.stumps.empty());
    int nstages = (int)cascade.data.stages.size();
    FEval& featureEvaluator = (FEval&)*_featureEvaluator;
--- a/modules/objdetect/src/haar.cpp
+++ b/modules/objdetect/src/haar.cpp
@ -340,8 +340,8 @@ icvCreateHidHaarClassifierCascade( CvHaarClassifierCascade* cascade )
            out->isStumpBased &= node_count == 1;
        }
    }
-/*
-#ifdef HAVE_IPP
+
+#if defined HAVE_IPP && !IPP_DISABLE_HAAR
    int can_use_ipp = CV_IPP_CHECK_COND && (!out->has_tilted_features && !out->is_tree && out->isStumpBased);

    if( can_use_ipp )
@ -396,7 +396,7 @@ icvCreateHidHaarClassifierCascade( CvHaarClassifierCascade* cascade )
        }
    }
 #endif
-*/
+
    cascade->hid_cascade = out;
    assert( (char*)haar_node_ptr - (char*)out <= datasize );