Merge remote-tracking branch 'upstream/3.4' into merge-3.4

7 years ago · 1913482cf5
parent c7d04c6c5c 56eebb926d
commit 1913482cf5
90 changed files with 6204 additions and 6600 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -205,16 +205,17 @@ endif()
 OCV_OPTION(OPENCV_ENABLE_NONFREE "Enable non-free algorithms" OFF)

 # 3rd party libs
-OCV_OPTION(BUILD_ZLIB               "Build zlib from source"             WIN32 OR APPLE)
-OCV_OPTION(BUILD_TIFF               "Build libtiff from source"          WIN32 OR ANDROID OR APPLE)
-OCV_OPTION(BUILD_JASPER             "Build libjasper from source"        WIN32 OR ANDROID OR APPLE)
-OCV_OPTION(BUILD_JPEG               "Build libjpeg from source"          WIN32 OR ANDROID OR APPLE)
-OCV_OPTION(BUILD_PNG                "Build libpng from source"           WIN32 OR ANDROID OR APPLE)
-OCV_OPTION(BUILD_OPENEXR            "Build openexr from source"          (WIN32 OR ANDROID OR APPLE) AND NOT WINRT)
-OCV_OPTION(BUILD_WEBP               "Build WebP from source"             (WIN32 OR ANDROID OR APPLE) AND NOT WINRT)
-OCV_OPTION(BUILD_TBB                "Download and build TBB from source" ANDROID )
-OCV_OPTION(BUILD_IPP_IW             "Build IPP IW from source"           NOT MINGW IF (X86_64 OR X86) AND NOT WINRT )
-OCV_OPTION(BUILD_ITT                "Build Intel ITT from source"        NOT MINGW IF (X86_64 OR X86) AND NOT WINRT AND NOT APPLE_FRAMEWORK )
+OCV_OPTION(OPENCV_FORCE_3RDPARTY_BUILD   "Force using 3rdparty code from source" OFF)
+OCV_OPTION(BUILD_ZLIB               "Build zlib from source"             (WIN32 OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) )
+OCV_OPTION(BUILD_TIFF               "Build libtiff from source"          (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) )
+OCV_OPTION(BUILD_JASPER             "Build libjasper from source"        (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) )
+OCV_OPTION(BUILD_JPEG               "Build libjpeg from source"          (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) )
+OCV_OPTION(BUILD_PNG                "Build libpng from source"           (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) )
+OCV_OPTION(BUILD_OPENEXR            "Build openexr from source"          (((WIN32 OR ANDROID OR APPLE) AND NOT WINRT) OR OPENCV_FORCE_3RDPARTY_BUILD) )
+OCV_OPTION(BUILD_WEBP               "Build WebP from source"             (((WIN32 OR ANDROID OR APPLE) AND NOT WINRT) OR OPENCV_FORCE_3RDPARTY_BUILD) )
+OCV_OPTION(BUILD_TBB                "Download and build TBB from source" (ANDROID OR OPENCV_FORCE_3RDPARTY_BUILD) )
+OCV_OPTION(BUILD_IPP_IW             "Build IPP IW from source"           (NOT MINGW OR OPENCV_FORCE_3RDPARTY_BUILD) IF (X86_64 OR X86) AND NOT WINRT )
+OCV_OPTION(BUILD_ITT                "Build Intel ITT from source"        (NOT MINGW OR OPENCV_FORCE_3RDPARTY_BUILD) IF (X86_64 OR X86) AND NOT WINRT AND NOT APPLE_FRAMEWORK )

 # Optional 3rd party components
 # ===================================================
@ -339,6 +340,7 @@ OCV_OPTION(ENABLE_BUILD_HARDENING     "Enable hardening of the resulting binarie
 OCV_OPTION(ENABLE_LTO                 "Enable Link Time Optimization" OFF IF CV_GCC OR MSVC)
 OCV_OPTION(ENABLE_THIN_LTO            "Enable Thin LTO" OFF IF CV_CLANG)
 OCV_OPTION(GENERATE_ABI_DESCRIPTOR    "Generate XML file for abi_compliance_checker tool" OFF IF UNIX)
+OCV_OPTION(OPENCV_GENERATE_PKGCONFIG  "Generate .pc file for pkg-config build tool (deprecated)" ON IF (UNIX AND NOT MSVC AND NOT IOS AND NOT ANDROID) )
 OCV_OPTION(CV_ENABLE_INTRINSICS       "Use intrinsic-based optimized code" ON )
 OCV_OPTION(CV_DISABLE_OPTIMIZATION    "Disable explicit optimized code (dispatched code/intrinsics/loop unrolling/etc)" OFF )
 OCV_OPTION(CV_TRACE                   "Enable OpenCV code trace" ON)
@ -856,6 +858,7 @@ include(cmake/OpenCVGenHeaders.cmake)

 # Generate opencv.pc for pkg-config command
 if(NOT OPENCV_SKIP_PKGCONFIG_GENERATION
+    AND OPENCV_GENERATE_PKGCONFIG
    AND NOT CMAKE_GENERATOR MATCHES "Xcode")
  include(cmake/OpenCVGenPkgconfig.cmake)
 endif()
--- a/cmake/OpenCVUtils.cmake
+++ b/cmake/OpenCVUtils.cmake
@ -605,11 +605,13 @@ macro(OCV_OPTION variable description value)
      option(${variable} "${description}" ${__value})
    endif()
  else()
-    if(DEFINED ${variable})
-      # TODO: message(WARNING "Option will be ignored: ${variable} (=${${variable}})")
+    if(DEFINED ${variable} AND NOT OPENCV_HIDE_WARNING_UNSUPPORTED_OPTION)
+      message(WARNING "Unexpected option: ${variable} (=${${variable}})\nCondition: IF (${__condition})")
    endif()
+    if(OPENCV_UNSET_UNSUPPORTED_OPTION)
      unset(${variable} CACHE)
    endif()
+  endif()
  unset(__condition)
  unset(__value)
 endmacro()
--- a/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown
+++ b/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown
@ -81,8 +81,8 @@ points.
 Now an orientation is assigned to each keypoint to achieve invariance to image rotation. A
 neighbourhood is taken around the keypoint location depending on the scale, and the gradient
 magnitude and direction is calculated in that region. An orientation histogram with 36 bins covering
-360 degrees is created. (It is weighted by gradient magnitude and gaussian-weighted circular window
-with \f$\sigma\f$ equal to 1.5 times the scale of keypoint. The highest peak in the histogram is taken
+360 degrees is created (It is weighted by gradient magnitude and gaussian-weighted circular window
+with \f$\sigma\f$ equal to 1.5 times the scale of keypoint). The highest peak in the histogram is taken
 and any peak above 80% of it is also considered to calculate the orientation. It creates keypoints
 with same location and scale, but different directions. It contribute to stability of matching.

@ -99,7 +99,7 @@ illumination changes, rotation etc.
 Keypoints between two images are matched by identifying their nearest neighbours. But in some cases,
 the second closest-match may be very near to the first. It may happen due to noise or some other
 reasons. In that case, ratio of closest-distance to second-closest distance is taken. If it is
-greater than 0.8, they are rejected. It eliminaters around 90% of false matches while discards only
+greater than 0.8, they are rejected. It eliminates around 90% of false matches while discards only
 5% correct matches, as per the paper.

 So this is a summary of SIFT algorithm. For more details and understanding, reading the original
--- a/doc/py_tutorials/py_video/py_bg_subtraction/py_bg_subtraction.markdown
+++ b/doc/py_tutorials/py_video/py_bg_subtraction/py_bg_subtraction.markdown
@ -20,7 +20,7 @@ extract the moving foreground from static background.
 If you have an image of background alone, like an image of the room without visitors, image of the road
 without vehicles etc, it is an easy job. Just subtract the new image from the background. You get
 the foreground objects alone. But in most of the cases, you may not have such an image, so we need
-to extract the background from whatever images we have. It become more complicated when there are
+to extract the background from whatever images we have. It becomes more complicated when there are
 shadows of the vehicles. Since shadows also move, simple subtraction will mark that also as
 foreground. It complicates things.

@ -72,7 +72,7 @@ papers by Z.Zivkovic, "Improved adaptive Gaussian mixture model for background s
 and "Efficient Adaptive Density Estimation per Image Pixel for the Task of Background Subtraction"
 in 2006. One important feature of this algorithm is that it selects the appropriate number of
 gaussian distribution for each pixel. (Remember, in last case, we took a K gaussian distributions
-throughout the algorithm). It provides better adaptibility to varying scenes due illumination
+throughout the algorithm). It provides better adaptability to varying scenes due illumination
 changes etc.

 As in previous case, we have to create a background subtractor object. Here, you have an option of
--- a/doc/py_tutorials/py_video/py_lucas_kanade/py_lucas_kanade.markdown
+++ b/doc/py_tutorials/py_video/py_lucas_kanade/py_lucas_kanade.markdown
@ -75,10 +75,10 @@ solution.
 ( Check similarity of inverse matrix with Harris corner detector. It denotes that corners are better
 points to be tracked.)

-So from user point of view, idea is simple, we give some points to track, we receive the optical
+So from the user point of view, the idea is simple, we give some points to track, we receive the optical
 flow vectors of those points. But again there are some problems. Until now, we were dealing with
-small motions. So it fails when there is large motion. So again we go for pyramids. When we go up in
-the pyramid, small motions are removed and large motions becomes small motions. So applying
+small motions, so it fails when there is a large motion. To deal with this we use pyramids. When we go up in
+the pyramid, small motions are removed and large motions become small motions. So by applying
 Lucas-Kanade there, we get optical flow along with the scale.

 Lucas-Kanade Optical Flow in OpenCV
--- a/doc/tutorials/core/how_to_scan_images/how_to_scan_images.markdown
+++ b/doc/tutorials/core/how_to_scan_images/how_to_scan_images.markdown
@ -69,7 +69,7 @@ to an integer format. Then we use a simple look and the upper formula to calcula
 No OpenCV specific stuff here.

 Another issue is how do we measure time? Well OpenCV offers two simple functions to achieve this
-@ref cv::getTickCount() and @ref cv::getTickFrequency() . The first returns the number of ticks of
+cv::getTickCount() and cv::getTickFrequency() . The first returns the number of ticks of
 your systems CPU from a certain event (like since you booted your system). The second returns how
 many times your CPU emits a tick during a second. So to measure in seconds the number of time
 elapsed between two operations is easy as:
@ -98,7 +98,7 @@ example in case of an BGR color system:
 Note that the order of the channels is inverse: BGR instead of RGB. Because in many cases the memory
 is large enough to store the rows in a successive fashion the rows may follow one after another,
 creating a single long row. Because everything is in a single place following one after another this
-may help to speed up the scanning process. We can use the @ref cv::Mat::isContinuous() function to *ask*
+may help to speed up the scanning process. We can use the cv::Mat::isContinuous() function to *ask*
 the matrix if this is the case. Continue on to the next section to find an example.

 The efficient way
@ -155,7 +155,7 @@ elements in the image. Its basic usage is to specify the row and column number o
 to access. During our earlier scanning methods you could already observe that is important through
 what type we are looking at the image. It's no different here as you need to manually specify what
 type to use at the automatic lookup. You can observe this in case of the gray scale images for the
-following source code (the usage of the + @ref cv::at() function):
+following source code (the usage of the + cv::Mat::at() function):

@snippet how_to_scan_images.cpp scan-random

@ -169,12 +169,12 @@ new row pointer for what we use the C operator[] to acquire the column element.

 If you need to do multiple lookups using this method for an image it may be troublesome and time
 consuming to enter the type and the at keyword for each of the accesses. To solve this problem
-OpenCV has a @ref cv::Mat_ data type. It's the same as Mat with the extra need that at definition
+OpenCV has a cv::Mat_ data type. It's the same as Mat with the extra need that at definition
 you need to specify the data type through what to look at the data matrix, however in return you can
 use the operator() for fast access of items. To make things even better this is easily convertible
-from and to the usual @ref cv::Mat data type. A sample usage of this you can see in case of the
+from and to the usual cv::Mat data type. A sample usage of this you can see in case of the
 color images of the upper function. Nevertheless, it's important to note that the same operation
-(with the same runtime speed) could have been done with the @ref cv::at() function. It's just a less
+(with the same runtime speed) could have been done with the cv::Mat::at function. It's just a less
 to write for the lazy programmer trick.

 The Core Function
@ -183,7 +183,7 @@ The Core Function
 This is a bonus method of achieving lookup table modification in an image. In image
 processing it's quite common that you want to modify all of a given image values to some other value.
 OpenCV provides a function for modifying image values, without the need to write the scanning logic
-of the image. We use the @ref cv::LUT() function of the core module. First we build a Mat type of the
+of the image. We use the cv::LUT() function of the core module. First we build a Mat type of the
 lookup table:

@snippet how_to_scan_images.cpp table-init
--- a/modules/calib3d/src/calibinit.cpp
+++ b/modules/calib3d/src/calibinit.cpp
@ -2228,13 +2228,13 @@ bool findCirclesGrid( InputArray _image, Size patternSize,
      void* oldCbkData;
      ErrorCallback oldCbk = redirectError(quiet_error, 0, &oldCbkData); // FIXIT not thread safe
 #endif
-      CV_TRY
+      try
      {
        isFound = boxFinder.findHoles();
      }
-      CV_CATCH(Exception, e)
+      catch (const cv::Exception &)
      {
-          CV_UNUSED(e);
+
      }
 #if BE_QUIET
      redirectError(oldCbk, oldCbkData);
--- a/modules/core/CMakeLists.txt
+++ b/modules/core/CMakeLists.txt
@ -2,6 +2,7 @@ set(the_description "The Core Functionality")

 ocv_add_dispatched_file(mathfuncs_core SSE2 AVX AVX2)
 ocv_add_dispatched_file(stat SSE4_2 AVX2)
+ocv_add_dispatched_file(arithm SSE2 SSE4_1 AVX2)

 # dispatching for accuracy tests
 ocv_add_dispatched_file_force_all(test_intrin128 TEST SSE2 SSE3 SSSE3 SSE4_1 SSE4_2 AVX FP16 AVX2)
--- a/modules/core/include/opencv2/core/cvdef.h
+++ b/modules/core/include/opencv2/core/cvdef.h
@ -152,20 +152,6 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard

 #define CV_UNUSED(name) (void)name

-#if defined __GNUC__ && !defined __EXCEPTIONS
-#define CV_TRY
-#define CV_CATCH(A, B) for (A B; false; )
-#define CV_CATCH_ALL if (false)
-#define CV_THROW(A) abort()
-#define CV_RETHROW() abort()
-#else
-#define CV_TRY try
-#define CV_CATCH(A, B) catch(const A & B)
-#define CV_CATCH_ALL catch(...)
-#define CV_THROW(A) throw A
-#define CV_RETHROW() throw
-#endif
-
 //! @endcond

 // undef problematic defines sometimes defined by system headers (windows.h in particular)
--- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp
@ -661,7 +661,7 @@ inline v_uint8x32 operator * (const v_uint8x32& a, const v_uint8x32& b)
 {
    v_uint16x16 c, d;
    v_mul_expand(a, b, c, d);
-    return v_pack_u(v_reinterpret_as_s16(c), v_reinterpret_as_s16(d));
+    return v_pack(c, d);
 }
 inline v_int8x32 operator * (const v_int8x32& a, const v_int8x32& b)
 {
@ -1291,6 +1291,16 @@ inline v_float32x8 v_absdiff(const v_float32x8& a, const v_float32x8& b)
 inline v_float64x4 v_absdiff(const v_float64x4& a, const v_float64x4& b)
 { return v_abs(a - b); }

+/** Saturating absolute difference **/
+inline v_int8x32 v_absdiffs(const v_int8x32& a, const v_int8x32& b)
+{
+    v_int8x32 d = a - b;
+    v_int8x32 m = a < b;
+    return (d ^ m) - m;
+}
+inline v_int16x16 v_absdiffs(const v_int16x16& a, const v_int16x16& b)
+{ return v_max(a, b) - v_min(a, b); }
+
 ////////// Conversions /////////

 /** Rounding **/
@ -1300,6 +1310,12 @@ inline v_int32x8 v_round(const v_float32x8& a)
 inline v_int32x8 v_round(const v_float64x4& a)
 { return v_int32x8(_mm256_castsi128_si256(_mm256_cvtpd_epi32(a.val))); }

+inline v_int32x8 v_round(const v_float64x4& a, const v_float64x4& b)
+{
+    __m128i ai = _mm256_cvtpd_epi32(a.val), bi = _mm256_cvtpd_epi32(b.val);
+    return v_int32x8(_v256_combine(ai, bi));
+}
+
 inline v_int32x8 v_trunc(const v_float32x8& a)
 { return v_int32x8(_mm256_cvttps_epi32(a.val)); }

@ -1689,6 +1705,40 @@ void v_rshr_pack_store(int* ptr, const v_int64x4& a)
    v_pack_store(ptr, (a + delta) >> n);
 }

+// pack boolean
+inline v_uint8x32 v_pack_b(const v_uint16x16& a, const v_uint16x16& b)
+{
+    __m256i ab = _mm256_packs_epi16(a.val, b.val);
+    return v_uint8x32(_v256_shuffle_odd_64(ab));
+}
+
+inline v_uint8x32 v_pack_b(const v_uint32x8& a, const v_uint32x8& b,
+                           const v_uint32x8& c, const v_uint32x8& d)
+{
+    __m256i ab = _mm256_packs_epi32(a.val, b.val);
+    __m256i cd = _mm256_packs_epi32(c.val, d.val);
+
+    __m256i abcd = _v256_shuffle_odd_64(_mm256_packs_epi16(ab, cd));
+    return v_uint8x32(_mm256_shuffle_epi32(abcd, _MM_SHUFFLE(3, 1, 2, 0)));
+}
+
+inline v_uint8x32 v_pack_b(const v_uint64x4& a, const v_uint64x4& b, const v_uint64x4& c,
+                           const v_uint64x4& d, const v_uint64x4& e, const v_uint64x4& f,
+                           const v_uint64x4& g, const v_uint64x4& h)
+{
+    __m256i ab = _mm256_packs_epi32(a.val, b.val);
+    __m256i cd = _mm256_packs_epi32(c.val, d.val);
+    __m256i ef = _mm256_packs_epi32(e.val, f.val);
+    __m256i gh = _mm256_packs_epi32(g.val, h.val);
+
+    __m256i abcd = _mm256_packs_epi32(ab, cd);
+    __m256i efgh = _mm256_packs_epi32(ef, gh);
+    __m256i pkall = _v256_shuffle_odd_64(_mm256_packs_epi16(abcd, efgh));
+
+    __m256i rev = _mm256_alignr_epi8(pkall, pkall, 8);
+    return v_uint8x32(_mm256_unpacklo_epi16(pkall, rev));
+}
+
 /* Recombine */
 // its up there with load and store operations

--- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp
@ -109,7 +109,7 @@ These operations allow to reorder or recombine elements in one or multiple vecto

 - Interleave, deinterleave (2, 3 and 4 channels): @ref v_load_deinterleave, @ref v_store_interleave
 - Expand: @ref v_load_expand, @ref v_load_expand_q, @ref v_expand, @ref v_expand_low, @ref v_expand_high
- Pack: @ref v_pack, @ref v_pack_u, @ref v_rshr_pack, @ref v_rshr_pack_u,
+- Pack: @ref v_pack, @ref v_pack_u, @ref v_pack_b, @ref v_rshr_pack, @ref v_rshr_pack_u,
@ref v_pack_store, @ref v_pack_u_store, @ref v_rshr_pack_store, @ref v_rshr_pack_u_store
 - Recombine: @ref v_zip, @ref v_recombine, @ref v_combine_low, @ref v_combine_high
 - Extract: @ref v_extract
@ -159,7 +159,7 @@ Most of these operations return only one value.
 ### Other math

 - Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude
- Absolute values: @ref v_abs, @ref v_absdiff
+- Absolute values: @ref v_abs, @ref v_absdiff, @ref v_absdiffs

 ### Conversions

@ -199,10 +199,12 @@ Regular integers:
 |logical            | x | x | x | x | x | x |
 |min, max           | x | x | x | x | x | x |
 |absdiff            | x | x | x | x | x | x |
+|absdiffs           |   | x |   | x |   |   |
 |reduce             |   |   |   |   | x | x |
 |mask               | x | x | x | x | x | x |
 |pack               | x | x | x | x | x | x |
 |pack_u             | x |   | x |   |   |   |
+|pack_b             | x |   |   |   |   |   |
 |unpack             | x | x | x | x | x | x |
 |extract            | x | x | x | x | x | x |
 |rotate (lanes)     | x | x | x | x | x | x |
@ -762,6 +764,19 @@ inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b)
    return c;
 }

+/** @brief Saturating absolute difference
+
+Returns \f$ saturate(|a - b|) \f$ .
+For 8-, 16-bit signed integer source types. */
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_absdiffs(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++)
+        c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i]));
+    return c;
+}
+
 /** @brief Inversed square root

 Returns \f$ 1/sqrt(a) \f$
@ -1613,6 +1628,18 @@ template<int n> inline v_reg<int, n> v_round(const v_reg<float, n>& a)
    return c;
 }

+/** @overload */
+template<int n> inline v_reg<int, n*2> v_round(const v_reg<double, n>& a, const v_reg<double, n>& b)
+{
+    v_reg<int, n*2> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = cvRound(a.s[i]);
+        c.s[i+n] = cvRound(b.s[i]);
+    }
+    return c;
+}
+
 /** @brief Floor

 Floor each value. Input type is float vector ==> output type is int vector.*/
@ -2059,6 +2086,103 @@ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, s
 OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast)
 //! @}

+//! @cond IGNORED
+template<typename _Tpm, typename _Tp, int n>
+inline void _pack_b(_Tpm* mptr, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    for (int i = 0; i < n; ++i)
+    {
+        mptr[i] = (_Tpm)a.s[i];
+        mptr[i + n] = (_Tpm)b.s[i];
+    }
+}
+//! @endcond
+
+//! @name Pack boolean values
+//! @{
+//! @brief Pack boolean values from multiple vectors to one unsigned 8-bit integer vector
+//!
+//! @note Must provide valid boolean values to guarantee same result for all architectures.
+
+/** @brief
+//! For 16-bit boolean values
+
+Scheme:
+@code
+a  {0xFFFF 0 0 0xFFFF 0 0xFFFF 0xFFFF 0}
+b  {0xFFFF 0 0xFFFF 0 0 0xFFFF 0 0xFFFF}
+===============
+{
+   0xFF 0 0 0xFF 0 0xFF 0xFF 0
+   0xFF 0 0xFF 0 0 0xFF 0 0xFF
+}
+@endcode */
+
+inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
+{
+    v_uint8x16 mask;
+    _pack_b(mask.s, a, b);
+    return mask;
+}
+
+/** @overload
+For 32-bit boolean values
+
+Scheme:
+@code
+a  {0xFFFF.. 0 0 0xFFFF..}
+b  {0 0xFFFF.. 0xFFFF.. 0}
+c  {0xFFFF.. 0 0xFFFF.. 0}
+d  {0 0xFFFF.. 0 0xFFFF..}
+===============
+{
+   0xFF 0 0 0xFF 0 0xFF 0xFF 0
+   0xFF 0 0xFF 0 0 0xFF 0 0xFF
+}
+@endcode */
+
+inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
+                           const v_uint32x4& c, const v_uint32x4& d)
+{
+    v_uint8x16 mask;
+    _pack_b(mask.s, a, b);
+    _pack_b(mask.s + 8, c, d);
+    return mask;
+}
+
+/** @overload
+For 64-bit boolean values
+
+Scheme:
+@code
+a  {0xFFFF.. 0}
+b  {0 0xFFFF..}
+c  {0xFFFF.. 0}
+d  {0 0xFFFF..}
+
+e  {0xFFFF.. 0}
+f  {0xFFFF.. 0}
+g  {0 0xFFFF..}
+h  {0 0xFFFF..}
+===============
+{
+   0xFF 0 0 0xFF 0xFF 0 0 0xFF
+   0xFF 0 0xFF 0 0 0xFF 0 0xFF
+}
+@endcode */
+inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
+                           const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
+                           const v_uint64x2& g, const v_uint64x2& h)
+{
+    v_uint8x16 mask;
+    _pack_b(mask.s, a, b);
+    _pack_b(mask.s + 4, c, d);
+    _pack_b(mask.s + 8, e, f);
+    _pack_b(mask.s + 12, g, h);
+    return mask;
+}
+//! @}
+
 /** @brief Matrix multiplication

 Scheme:
--- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp
@ -394,6 +394,35 @@ OPENCV_HAL_IMPL_NEON_PACK(v_int32x4, int, int32x2_t, s32, v_int64x2, pack, vmovn
 OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_int16x8, pack_u, vqmovun_s16, vqrshrun_n_s16)
 OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_int32x4, pack_u, vqmovun_s32, vqrshrun_n_s32)

+// pack boolean
+inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
+{
+    uint8x16_t ab = vcombine_u8(vmovn_u16(a.val), vmovn_u16(b.val));
+    return v_uint8x16(ab);
+}
+
+inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
+                           const v_uint32x4& c, const v_uint32x4& d)
+{
+    uint16x8_t nab = vcombine_u16(vmovn_u32(a.val), vmovn_u32(b.val));
+    uint16x8_t ncd = vcombine_u16(vmovn_u32(c.val), vmovn_u32(d.val));
+    return v_uint8x16(vcombine_u8(vmovn_u16(nab), vmovn_u16(ncd)));
+}
+
+inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
+                           const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
+                           const v_uint64x2& g, const v_uint64x2& h)
+{
+    uint32x4_t ab = vcombine_u32(vmovn_u64(a.val), vmovn_u64(b.val));
+    uint32x4_t cd = vcombine_u32(vmovn_u64(c.val), vmovn_u64(d.val));
+    uint32x4_t ef = vcombine_u32(vmovn_u64(e.val), vmovn_u64(f.val));
+    uint32x4_t gh = vcombine_u32(vmovn_u64(g.val), vmovn_u64(h.val));
+
+    uint16x8_t abcd = vcombine_u16(vmovn_u32(ab), vmovn_u32(cd));
+    uint16x8_t efgh = vcombine_u16(vmovn_u32(ef), vmovn_u32(gh));
+    return v_uint8x16(vcombine_u8(vmovn_u16(abcd), vmovn_u16(efgh)));
+}
+
 inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
                            const v_float32x4& m1, const v_float32x4& m2,
                            const v_float32x4& m3)
@ -748,7 +777,6 @@ OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_mul_wrap, vmulq_s8)
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_mul_wrap, vmulq_u16)
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_mul_wrap, vmulq_s16)

-// TODO: absdiff for signed integers
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_absdiff, vabdq_u8)
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_absdiff, vabdq_u16)
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_absdiff, vabdq_u32)
@ -757,6 +785,12 @@ OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_absdiff, vabdq_f32)
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_absdiff, vabdq_f64)
 #endif

+/** Saturating absolute difference **/
+inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b)
+{ return v_int8x16(vqabsq_s8(vqsubq_s8(a.val, b.val))); }
+inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b)
+{ return v_int16x8(vqabsq_s16(vqsubq_s16(a.val, b.val))); }
+
 #define OPENCV_HAL_IMPL_NEON_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin) \
 inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \
 { \
@ -1242,6 +1276,11 @@ inline v_int32x4 v_round(const v_float64x2& a)
    return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero));
 }

+inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b)
+{
+    return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), vmovn_s64(vcvtaq_s64_f64(b.val))));
+}
+
 inline v_int32x4 v_floor(const v_float64x2& a)
 {
    static const int32x2_t zero = vdup_n_s32(0);
--- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp
@ -634,6 +634,35 @@ void v_rshr_pack_store(int* ptr, const v_int64x2& a)
    _mm_storel_epi64((__m128i*)ptr, a2);
 }

+// pack boolean
+inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
+{
+    __m128i ab = _mm_packs_epi16(a.val, b.val);
+    return v_uint8x16(ab);
+}
+
+inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
+                           const v_uint32x4& c, const v_uint32x4& d)
+{
+    __m128i ab = _mm_packs_epi32(a.val, b.val);
+    __m128i cd = _mm_packs_epi32(c.val, d.val);
+    return v_uint8x16(_mm_packs_epi16(ab, cd));
+}
+
+inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
+                           const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
+                           const v_uint64x2& g, const v_uint64x2& h)
+{
+    __m128i ab = _mm_packs_epi32(a.val, b.val);
+    __m128i cd = _mm_packs_epi32(c.val, d.val);
+    __m128i ef = _mm_packs_epi32(e.val, f.val);
+    __m128i gh = _mm_packs_epi32(g.val, h.val);
+
+    __m128i abcd = _mm_packs_epi32(ab, cd);
+    __m128i efgh = _mm_packs_epi32(ef, gh);
+    return v_uint8x16(_mm_packs_epi16(abcd, efgh));
+}
+
 inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
                            const v_float32x4& m1, const v_float32x4& m2,
                            const v_float32x4& m3)
@ -706,19 +735,11 @@ OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int64x2, _mm_sub_epi64)
    inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b)      \
    { a = a * b; return a; }

+OPENCV_HAL_IMPL_SSE_MUL_SAT(v_uint8x16, v_uint16x8)
 OPENCV_HAL_IMPL_SSE_MUL_SAT(v_int8x16,  v_int16x8)
 OPENCV_HAL_IMPL_SSE_MUL_SAT(v_uint16x8, v_uint32x4)
 OPENCV_HAL_IMPL_SSE_MUL_SAT(v_int16x8,  v_int32x4)

-inline v_uint8x16 operator * (const v_uint8x16& a, const v_uint8x16& b)
-{
-    v_uint16x8 c, d;
-    v_mul_expand(a, b, c, d);
-    return v_pack_u(v_reinterpret_as_s16(c), v_reinterpret_as_s16(d));
-}
-inline v_uint8x16& operator *= (v_uint8x16& a, const v_uint8x16& b)
-{ a = a * b; return a; }
-
 //  Multiply and expand
 inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b,
                         v_uint16x8& c, v_uint16x8& d)
@ -1045,34 +1066,43 @@ inline v_int8x16 v_mul_wrap(const v_int8x16& a, const v_int8x16& b)
    return v_reinterpret_as_s8(v_mul_wrap(v_reinterpret_as_u8(a), v_reinterpret_as_u8(b)));
 }

-#define OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(_Tpuvec, _Tpsvec, bits, smask32) \
-inline _Tpuvec v_absdiff(const _Tpuvec& a, const _Tpuvec& b) \
-{ \
-    return _Tpuvec(_mm_add_epi##bits(_mm_subs_epu##bits(a.val, b.val), _mm_subs_epu##bits(b.val, a.val))); \
-} \
-inline _Tpuvec v_absdiff(const _Tpsvec& a, const _Tpsvec& b) \
-{ \
-    __m128i smask = _mm_set1_epi32(smask32); \
-    __m128i a1 = _mm_xor_si128(a.val, smask); \
-    __m128i b1 = _mm_xor_si128(b.val, smask); \
-    return _Tpuvec(_mm_add_epi##bits(_mm_subs_epu##bits(a1, b1), _mm_subs_epu##bits(b1, a1))); \
-}
-
-OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(v_uint8x16, v_int8x16, 8, (int)0x80808080)
-OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(v_uint16x8, v_int16x8, 16, (int)0x80008000)
+/** Absolute difference **/

+inline v_uint8x16 v_absdiff(const v_uint8x16& a, const v_uint8x16& b)
+{ return v_add_wrap(a - b,  b - a); }
+inline v_uint16x8 v_absdiff(const v_uint16x8& a, const v_uint16x8& b)
+{ return v_add_wrap(a - b,  b - a); }
 inline v_uint32x4 v_absdiff(const v_uint32x4& a, const v_uint32x4& b)
+{ return v_max(a, b) - v_min(a, b); }
+
+inline v_uint8x16 v_absdiff(const v_int8x16& a, const v_int8x16& b)
 {
-    return v_max(a, b) - v_min(a, b);
+    v_int8x16 d = v_sub_wrap(a, b);
+    v_int8x16 m = a < b;
+    return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m));
+}
+inline v_uint16x8 v_absdiff(const v_int16x8& a, const v_int16x8& b)
+{
+    return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b)));
 }
-
 inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b)
 {
-    __m128i d = _mm_sub_epi32(a.val, b.val);
-    __m128i m = _mm_cmpgt_epi32(b.val, a.val);
-    return v_uint32x4(_mm_sub_epi32(_mm_xor_si128(d, m), m));
+    v_int32x4 d = a - b;
+    v_int32x4 m = a < b;
+    return v_reinterpret_as_u32((d ^ m) - m);
 }

+/** Saturating absolute difference **/
+inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b)
+{
+    v_int8x16 d = a - b;
+    v_int8x16 m = a < b;
+    return (d ^ m) - m;
+ }
+inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b)
+{ return v_max(a, b) - v_min(a, b); }
+
+
 inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
 {
    return a * b + c;
@ -1623,6 +1653,12 @@ inline v_int32x4 v_trunc(const v_float32x4& a)
 inline v_int32x4 v_round(const v_float64x2& a)
 { return v_int32x4(_mm_cvtpd_epi32(a.val)); }

+inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b)
+{
+    __m128i ai = _mm_cvtpd_epi32(a.val), bi = _mm_cvtpd_epi32(b.val);
+    return v_int32x4(_mm_unpacklo_epi64(ai, bi));
+}
+
 inline v_int32x4 v_floor(const v_float64x2& a)
 {
    __m128i a1 = _mm_cvtpd_epi32(a.val);
--- a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp
@ -383,6 +383,35 @@ OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_int32x4, unsigned int, int,
 //OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_int64x2, unsigned long long, long long,
 //                         vec_sra, vec_packsu, vec_add, pack_u)

+// pack boolean
+inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
+{
+    vec_uchar16 ab = vec_pack(a.val, b.val);
+    return v_uint8x16(ab);
+}
+
+inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
+                           const v_uint32x4& c, const v_uint32x4& d)
+{
+    vec_ushort8 ab = vec_pack(a.val, b.val);
+    vec_ushort8 cd = vec_pack(c.val, d.val);
+    return v_uint8x16(vec_pack(ab, cd));
+}
+
+inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
+                           const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
+                           const v_uint64x2& g, const v_uint64x2& h)
+{
+    vec_uint4 ab = vec_pack(a.val, b.val);
+    vec_uint4 cd = vec_pack(c.val, d.val);
+    vec_uint4 ef = vec_pack(e.val, f.val);
+    vec_uint4 gh = vec_pack(g.val, h.val);
+
+    vec_ushort8 abcd = vec_pack(ab, cd);
+    vec_ushort8 efgh = vec_pack(ef, gh);
+    return v_uint8x16(vec_pack(abcd, efgh));
+}
+
 /* Recombine */
 template <typename _Tpvec>
 inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1)
@ -834,16 +863,27 @@ inline v_float32x4 v_abs(const v_float32x4& x)
 inline v_float64x2 v_abs(const v_float64x2& x)
 { return v_float64x2(vec_abs(x.val)); }

+/** Absolute difference **/
+// unsigned
 OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_absdiff, vec_absd)

-#define OPENCV_HAL_IMPL_VSX_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin)  \
-inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b)                       \
-{ return _Tpvec2(cast(intrin(a.val, b.val))); }
+inline v_uint8x16 v_absdiff(const v_int8x16& a, const v_int8x16& b)
+{ return v_reinterpret_as_u8(v_sub_wrap(v_max(a, b), v_min(a, b))); }
+inline v_uint16x8 v_absdiff(const v_int16x8& a, const v_int16x8& b)
+{ return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); }
+inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b)
+{ return v_reinterpret_as_u32(v_max(a, b) - v_min(a, b)); }

-OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int8x16, v_uint8x16, vec_uchar16_c, v_absdiff, vec_absd)
-OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int16x8, v_uint16x8, vec_ushort8_c, v_absdiff, vec_absd)
-OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int32x4, v_uint32x4, vec_uint4_c, v_absdiff, vec_absd)
-OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int64x2, v_uint64x2, vec_udword2_c, v_absdiff, vec_absd)
+inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b)
+{ return v_abs(a - b); }
+inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b)
+{ return v_abs(a - b); }
+
+/** Absolute difference for signed integers **/
+inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b)
+{ return v_int8x16(vec_abss(vec_subs(a.val, b.val))); }
+inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b)
+{ return v_int16x8(vec_abss(vec_subs(a.val, b.val))); }

 ////////// Conversions /////////

@ -854,6 +894,9 @@ inline v_int32x4 v_round(const v_float32x4& a)
 inline v_int32x4 v_round(const v_float64x2& a)
 { return v_int32x4(vec_mergesqo(vec_ctso(vec_rint(a.val)), vec_int4_z)); }

+inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b)
+{ return v_int32x4(vec_mergesqo(vec_ctso(vec_rint(a.val)), vec_ctso(vec_rint(b.val)))); }
+
 inline v_int32x4 v_floor(const v_float32x4& a)
 { return v_int32x4(vec_cts(vec_floor(a.val))); }

--- a/modules/core/include/opencv2/core/private.hpp
+++ b/modules/core/include/opencv2/core/private.hpp
@ -180,6 +180,8 @@ T* allocSingleton(size_t count = 1) { return static_cast<T*>(allocSingletonBuffe
 *                     Structures and macros for integration with IPP                     *
 \****************************************************************************************/

+#define OPENCV_IPP_REDUCE_SIZE 1
+
 // Temporary disabled named IPP region. Accuracy
 #define IPP_DISABLE_PYRAMIDS_UP         1 // Different results
 #define IPP_DISABLE_PYRAMIDS_DOWN       1 // Different results
--- a/modules/core/include/opencv2/core/utility.hpp
+++ b/modules/core/include/opencv2/core/utility.hpp
@ -519,6 +519,23 @@ static inline size_t divUp(size_t a, unsigned int b)
    return (a + b - 1) / b;
 }

+/** @brief Round first value up to the nearest multiple of second value.
+
+Use this function instead of `ceil((float)a / b) * b` expressions.
+
+@sa divUp
+*/
+static inline int roundUp(int a, unsigned int b)
+{
+    CV_DbgAssert(a >= 0);
+    return a + b - 1 - (a + b -1) % b;
+}
+/** @overload */
+static inline size_t roundUp(size_t a, unsigned int b)
+{
+    return a + b - 1 - (a + b - 1) % b;
+}
+
 /** @brief Enables or disables the optimized code.

 The function can be used to dynamically turn on and off optimized dispatched code (code that uses SSE4.2, AVX/AVX2,
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
--- a/modules/core/src/arithm.dispatch.cpp
+++ b/modules/core/src/arithm.dispatch.cpp
@ -0,0 +1,11 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+#include "precomp.hpp"
+#include "arithm_ipp.hpp"
+#include "arithm.simd.hpp"
+#include "arithm.simd_declarations.hpp"
+
+#define ARITHM_DISPATCHING_ONLY
+#include "arithm.simd.hpp"
--- a/modules/core/src/arithm.simd.hpp
+++ b/modules/core/src/arithm.simd.hpp
--- a/modules/core/src/arithm_core.hpp
+++ b/modules/core/src/arithm_core.hpp
@ -1,629 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                          License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Copyright (C) 2015, Itseez Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_ARITHM_CORE_HPP__
-#define __OPENCV_ARITHM_CORE_HPP__
-
-#include "arithm_simd.hpp"
-
-namespace cv {
-
-template<typename T1, typename T2=T1, typename T3=T1> struct OpAdd
-{
-    typedef T1 type1;
-    typedef T2 type2;
-    typedef T3 rtype;
-    T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a + b); }
-};
-
-template<typename T1, typename T2=T1, typename T3=T1> struct OpSub
-{
-    typedef T1 type1;
-    typedef T2 type2;
-    typedef T3 rtype;
-    T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a - b); }
-};
-
-template<typename T1, typename T2=T1, typename T3=T1> struct OpRSub
-{
-    typedef T1 type1;
-    typedef T2 type2;
-    typedef T3 rtype;
-    T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(b - a); }
-};
-
-template<typename T> struct OpMin
-{
-    typedef T type1;
-    typedef T type2;
-    typedef T rtype;
-    T operator ()(const T a, const T b) const { return std::min(a, b); }
-};
-
-template<typename T> struct OpMax
-{
-    typedef T type1;
-    typedef T type2;
-    typedef T rtype;
-    T operator ()(const T a, const T b) const { return std::max(a, b); }
-};
-
-template<typename T> struct OpAbsDiff
-{
-    typedef T type1;
-    typedef T type2;
-    typedef T rtype;
-    T operator()(T a, T b) const { return a > b ? a - b : b - a; }
-};
-
-// specializations to prevent "-0" results
-template<> struct OpAbsDiff<float>
-{
-    typedef float type1;
-    typedef float type2;
-    typedef float rtype;
-    float operator()(float a, float b) const { return std::abs(a - b); }
-};
-template<> struct OpAbsDiff<double>
-{
-    typedef double type1;
-    typedef double type2;
-    typedef double rtype;
-    double operator()(double a, double b) const { return std::abs(a - b); }
-};
-
-template<typename T> struct OpAnd
-{
-    typedef T type1;
-    typedef T type2;
-    typedef T rtype;
-    T operator()( T a, T b ) const { return a & b; }
-};
-
-template<typename T> struct OpOr
-{
-    typedef T type1;
-    typedef T type2;
-    typedef T rtype;
-    T operator()( T a, T b ) const { return a | b; }
-};
-
-template<typename T> struct OpXor
-{
-    typedef T type1;
-    typedef T type2;
-    typedef T rtype;
-    T operator()( T a, T b ) const { return a ^ b; }
-};
-
-template<typename T> struct OpNot
-{
-    typedef T type1;
-    typedef T type2;
-    typedef T rtype;
-    T operator()( T a, T ) const { return ~a; }
-};
-
-//=============================================================================
-
-template<typename T, class Op, class VOp>
-void vBinOp(const T* src1, size_t step1, const T* src2, size_t step2, T* dst, size_t step, int width, int height)
-{
-#if CV_SSE2 || CV_NEON
-    VOp vop;
-#endif
-    Op op;
-
-    for( ; height--; src1 = (const T *)((const uchar *)src1 + step1),
-                        src2 = (const T *)((const uchar *)src2 + step2),
-                        dst = (T *)((uchar *)dst + step) )
-    {
-        int x = 0;
-
-#if CV_NEON || CV_SSE2
-#if CV_AVX2
-        if( USE_AVX2 )
-        {
-            for( ; x <= width - 32/(int)sizeof(T); x += 32/sizeof(T) )
-            {
-                typename VLoadStore256<T>::reg_type r0 = VLoadStore256<T>::load(src1 + x);
-                r0 = vop(r0, VLoadStore256<T>::load(src2 + x));
-                VLoadStore256<T>::store(dst + x, r0);
-            }
-        }
-#else
-#if CV_SSE2
-        if( USE_SSE2 )
-        {
-#endif // CV_SSE2
-            for( ; x <= width - 32/(int)sizeof(T); x += 32/sizeof(T) )
-            {
-                typename VLoadStore128<T>::reg_type r0 = VLoadStore128<T>::load(src1 + x               );
-                typename VLoadStore128<T>::reg_type r1 = VLoadStore128<T>::load(src1 + x + 16/sizeof(T));
-                r0 = vop(r0, VLoadStore128<T>::load(src2 + x               ));
-                r1 = vop(r1, VLoadStore128<T>::load(src2 + x + 16/sizeof(T)));
-                VLoadStore128<T>::store(dst + x               , r0);
-                VLoadStore128<T>::store(dst + x + 16/sizeof(T), r1);
-            }
-#if CV_SSE2
-        }
-#endif // CV_SSE2
-#endif // CV_AVX2
-#endif // CV_NEON || CV_SSE2
-
-#if CV_AVX2
-        // nothing
-#elif CV_SSE2
-        if( USE_SSE2 )
-        {
-            for( ; x <= width - 8/(int)sizeof(T); x += 8/sizeof(T) )
-            {
-                typename VLoadStore64<T>::reg_type r = VLoadStore64<T>::load(src1 + x);
-                r = vop(r, VLoadStore64<T>::load(src2 + x));
-                VLoadStore64<T>::store(dst + x, r);
-            }
-        }
-#endif
-
-#if CV_ENABLE_UNROLLED
-        for( ; x <= width - 4; x += 4 )
-        {
-            T v0 = op(src1[x], src2[x]);
-            T v1 = op(src1[x+1], src2[x+1]);
-            dst[x] = v0; dst[x+1] = v1;
-            v0 = op(src1[x+2], src2[x+2]);
-            v1 = op(src1[x+3], src2[x+3]);
-            dst[x+2] = v0; dst[x+3] = v1;
-        }
-#endif
-
-        for( ; x < width; x++ )
-            dst[x] = op(src1[x], src2[x]);
-    }
-}
-
-template<typename T, class Op, class Op32>
-void vBinOp32(const T* src1, size_t step1, const T* src2, size_t step2,
-              T* dst, size_t step, int width, int height)
-{
-#if CV_SSE2 || CV_NEON
-    Op32 op32;
-#endif
-    Op op;
-
-    for( ; height--; src1 = (const T *)((const uchar *)src1 + step1),
-                        src2 = (const T *)((const uchar *)src2 + step2),
-                        dst = (T *)((uchar *)dst + step) )
-    {
-        int x = 0;
-
-#if CV_AVX2
-        if( USE_AVX2 )
-        {
-            if( (((size_t)src1|(size_t)src2|(size_t)dst)&31) == 0 )
-            {
-                for( ; x <= width - 8; x += 8 )
-                {
-                    typename VLoadStore256Aligned<T>::reg_type r0 = VLoadStore256Aligned<T>::load(src1 + x);
-                    r0 = op32(r0, VLoadStore256Aligned<T>::load(src2 + x));
-                    VLoadStore256Aligned<T>::store(dst + x, r0);
-                }
-            }
-        }
-#elif CV_SSE2
-        if( USE_SSE2 )
-        {
-            if( (((size_t)src1|(size_t)src2|(size_t)dst)&15) == 0 )
-            {
-                for( ; x <= width - 8; x += 8 )
-                {
-                    typename VLoadStore128Aligned<T>::reg_type r0 = VLoadStore128Aligned<T>::load(src1 + x    );
-                    typename VLoadStore128Aligned<T>::reg_type r1 = VLoadStore128Aligned<T>::load(src1 + x + 4);
-                    r0 = op32(r0, VLoadStore128Aligned<T>::load(src2 + x    ));
-                    r1 = op32(r1, VLoadStore128Aligned<T>::load(src2 + x + 4));
-                    VLoadStore128Aligned<T>::store(dst + x    , r0);
-                    VLoadStore128Aligned<T>::store(dst + x + 4, r1);
-                }
-            }
-        }
-#endif // CV_AVX2
-
-#if CV_NEON || CV_SSE2
-#if CV_AVX2
-        if( USE_AVX2 )
-        {
-            for( ; x <= width - 8; x += 8 )
-            {
-                typename VLoadStore256<T>::reg_type r0 = VLoadStore256<T>::load(src1 + x);
-                r0 = op32(r0, VLoadStore256<T>::load(src2 + x));
-                VLoadStore256<T>::store(dst + x, r0);
-            }
-        }
-#else
-#if CV_SSE2
-        if( USE_SSE2 )
-        {
-#endif // CV_SSE2
-            for( ; x <= width - 8; x += 8 )
-            {
-                typename VLoadStore128<T>::reg_type r0 = VLoadStore128<T>::load(src1 + x    );
-                typename VLoadStore128<T>::reg_type r1 = VLoadStore128<T>::load(src1 + x + 4);
-                r0 = op32(r0, VLoadStore128<T>::load(src2 + x    ));
-                r1 = op32(r1, VLoadStore128<T>::load(src2 + x + 4));
-                VLoadStore128<T>::store(dst + x    , r0);
-                VLoadStore128<T>::store(dst + x + 4, r1);
-            }
-#if CV_SSE2
-        }
-#endif // CV_SSE2
-#endif // CV_AVX2
-#endif // CV_NEON || CV_SSE2
-
-#if CV_ENABLE_UNROLLED
-        for( ; x <= width - 4; x += 4 )
-        {
-            T v0 = op(src1[x], src2[x]);
-            T v1 = op(src1[x+1], src2[x+1]);
-            dst[x] = v0; dst[x+1] = v1;
-            v0 = op(src1[x+2], src2[x+2]);
-            v1 = op(src1[x+3], src2[x+3]);
-            dst[x+2] = v0; dst[x+3] = v1;
-        }
-#endif
-
-        for( ; x < width; x++ )
-            dst[x] = op(src1[x], src2[x]);
-    }
-}
-
-
-template<typename T, class Op, class Op64>
-void vBinOp64(const T* src1, size_t step1, const T* src2, size_t step2,
-               T* dst, size_t step, int width, int height)
-{
-#if CV_SSE2
-    Op64 op64;
-#endif
-    Op op;
-
-    for( ; height--; src1 = (const T *)((const uchar *)src1 + step1),
-                        src2 = (const T *)((const uchar *)src2 + step2),
-                        dst = (T *)((uchar *)dst + step) )
-    {
-        int x = 0;
-
-#if CV_AVX2
-        if( USE_AVX2 )
-        {
-            if( (((size_t)src1|(size_t)src2|(size_t)dst)&31) == 0 )
-            {
-                for( ; x <= width - 4; x += 4 )
-                {
-                    typename VLoadStore256Aligned<T>::reg_type r0 = VLoadStore256Aligned<T>::load(src1 + x);
-                    r0 = op64(r0, VLoadStore256Aligned<T>::load(src2 + x));
-                    VLoadStore256Aligned<T>::store(dst + x, r0);
-                }
-            }
-        }
-#elif CV_SSE2
-        if( USE_SSE2 )
-        {
-            if( (((size_t)src1|(size_t)src2|(size_t)dst)&15) == 0 )
-            {
-                for( ; x <= width - 4; x += 4 )
-                {
-                    typename VLoadStore128Aligned<T>::reg_type r0 = VLoadStore128Aligned<T>::load(src1 + x    );
-                    typename VLoadStore128Aligned<T>::reg_type r1 = VLoadStore128Aligned<T>::load(src1 + x + 2);
-                    r0 = op64(r0, VLoadStore128Aligned<T>::load(src2 + x    ));
-                    r1 = op64(r1, VLoadStore128Aligned<T>::load(src2 + x + 2));
-                    VLoadStore128Aligned<T>::store(dst + x    , r0);
-                    VLoadStore128Aligned<T>::store(dst + x + 2, r1);
-                }
-            }
-        }
-#endif
-
-        for( ; x <= width - 4; x += 4 )
-        {
-            T v0 = op(src1[x], src2[x]);
-            T v1 = op(src1[x+1], src2[x+1]);
-            dst[x] = v0; dst[x+1] = v1;
-            v0 = op(src1[x+2], src2[x+2]);
-            v1 = op(src1[x+3], src2[x+3]);
-            dst[x+2] = v0; dst[x+3] = v1;
-        }
-
-        for( ; x < width; x++ )
-            dst[x] = op(src1[x], src2[x]);
-    }
-}
-
-template<typename T> static void
-cmp_(const T* src1, size_t step1, const T* src2, size_t step2,
-     uchar* dst, size_t step, int width, int height, int code)
-{
-    step1 /= sizeof(src1[0]);
-    step2 /= sizeof(src2[0]);
-    if( code == CMP_GE || code == CMP_LT )
-    {
-        std::swap(src1, src2);
-        std::swap(step1, step2);
-        code = code == CMP_GE ? CMP_LE : CMP_GT;
-    }
-
-    Cmp_SIMD<T> vop(code);
-
-    if( code == CMP_GT || code == CMP_LE )
-    {
-        int m = code == CMP_GT ? 0 : 255;
-        for( ; height--; src1 += step1, src2 += step2, dst += step )
-        {
-            int x = vop(src1, src2, dst, width);
-            #if CV_ENABLE_UNROLLED
-            for( ; x <= width - 4; x += 4 )
-            {
-                int t0, t1;
-                t0 = -(src1[x] > src2[x]) ^ m;
-                t1 = -(src1[x+1] > src2[x+1]) ^ m;
-                dst[x] = (uchar)t0; dst[x+1] = (uchar)t1;
-                t0 = -(src1[x+2] > src2[x+2]) ^ m;
-                t1 = -(src1[x+3] > src2[x+3]) ^ m;
-                dst[x+2] = (uchar)t0; dst[x+3] = (uchar)t1;
-            }
-            #endif
-            for( ; x < width; x++ )
-                dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
-        }
-    }
-    else if( code == CMP_EQ || code == CMP_NE )
-    {
-        int m = code == CMP_EQ ? 0 : 255;
-        for( ; height--; src1 += step1, src2 += step2, dst += step )
-        {
-            int x = 0;
-            #if CV_ENABLE_UNROLLED
-            for( ; x <= width - 4; x += 4 )
-            {
-                int t0, t1;
-                t0 = -(src1[x] == src2[x]) ^ m;
-                t1 = -(src1[x+1] == src2[x+1]) ^ m;
-                dst[x] = (uchar)t0; dst[x+1] = (uchar)t1;
-                t0 = -(src1[x+2] == src2[x+2]) ^ m;
-                t1 = -(src1[x+3] == src2[x+3]) ^ m;
-                dst[x+2] = (uchar)t0; dst[x+3] = (uchar)t1;
-            }
-            #endif
-            for( ; x < width; x++ )
-                dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
-        }
-    }
-}
-
-template<typename T, typename WT> static void
-mul_( const T* src1, size_t step1, const T* src2, size_t step2,
-      T* dst, size_t step, int width, int height, WT scale )
-{
-    step1 /= sizeof(src1[0]);
-    step2 /= sizeof(src2[0]);
-    step /= sizeof(dst[0]);
-
-    Mul_SIMD<T, WT> vop;
-
-    if( scale == (WT)1. )
-    {
-        for( ; height--; src1 += step1, src2 += step2, dst += step )
-        {
-            int i = vop(src1, src2, dst, width, scale);
-            #if CV_ENABLE_UNROLLED
-            for(; i <= width - 4; i += 4 )
-            {
-                T t0;
-                T t1;
-                t0 = saturate_cast<T>(src1[i  ] * src2[i  ]);
-                t1 = saturate_cast<T>(src1[i+1] * src2[i+1]);
-                dst[i  ] = t0;
-                dst[i+1] = t1;
-
-                t0 = saturate_cast<T>(src1[i+2] * src2[i+2]);
-                t1 = saturate_cast<T>(src1[i+3] * src2[i+3]);
-                dst[i+2] = t0;
-                dst[i+3] = t1;
-            }
-            #endif
-            for( ; i < width; i++ )
-                dst[i] = saturate_cast<T>(src1[i] * src2[i]);
-        }
-    }
-    else
-    {
-        for( ; height--; src1 += step1, src2 += step2, dst += step )
-        {
-            int i = vop(src1, src2, dst, width, scale);
-            #if CV_ENABLE_UNROLLED
-            for(; i <= width - 4; i += 4 )
-            {
-                T t0 = saturate_cast<T>(scale*(WT)src1[i]*src2[i]);
-                T t1 = saturate_cast<T>(scale*(WT)src1[i+1]*src2[i+1]);
-                dst[i] = t0; dst[i+1] = t1;
-
-                t0 = saturate_cast<T>(scale*(WT)src1[i+2]*src2[i+2]);
-                t1 = saturate_cast<T>(scale*(WT)src1[i+3]*src2[i+3]);
-                dst[i+2] = t0; dst[i+3] = t1;
-            }
-            #endif
-            for( ; i < width; i++ )
-                dst[i] = saturate_cast<T>(scale*(WT)src1[i]*src2[i]);
-        }
-    }
-}
-
-
-template<typename T> static void
-div_i( const T* src1, size_t step1, const T* src2, size_t step2,
-      T* dst, size_t step, int width, int height, double scale )
-{
-    step1 /= sizeof(src1[0]);
-    step2 /= sizeof(src2[0]);
-    step /= sizeof(dst[0]);
-
-    Div_SIMD<T> vop;
-    float scale_f = (float)scale;
-
-    for( ; height--; src1 += step1, src2 += step2, dst += step )
-    {
-        int i = vop(src1, src2, dst, width, scale);
-        for( ; i < width; i++ )
-        {
-            T num = src1[i], denom = src2[i];
-            T v = 0;
-            if (denom != 0)
-                v = saturate_cast<T>(num*scale_f/denom);
-            dst[i] = v;
-        }
-    }
-}
-
-template<typename T> static void
-div_f( const T* src1, size_t step1, const T* src2, size_t step2,
-      T* dst, size_t step, int width, int height, double scale )
-{
-    T scale_f = (T)scale;
-    step1 /= sizeof(src1[0]);
-    step2 /= sizeof(src2[0]);
-    step /= sizeof(dst[0]);
-
-    Div_SIMD<T> vop;
-
-    for( ; height--; src1 += step1, src2 += step2, dst += step )
-    {
-        int i = vop(src1, src2, dst, width, scale);
-        for( ; i < width; i++ )
-        {
-            T num = src1[i], denom = src2[i];
-            dst[i] = saturate_cast<T>(num*scale_f/denom);
-        }
-    }
-}
-
-template<typename T> static void
-recip_i( const T* src2, size_t step2,
-         T* dst, size_t step, int width, int height, double scale )
-{
-    step2 /= sizeof(src2[0]);
-    step /= sizeof(dst[0]);
-
-    Recip_SIMD<T> vop;
-    float scale_f = (float)scale;
-
-    for( ; height--; src2 += step2, dst += step )
-    {
-        int i = vop(src2, dst, width, scale);
-        for( ; i < width; i++ )
-        {
-            T denom = src2[i];
-            T v = 0;
-            if (denom != 0)
-                v = saturate_cast<T>(scale_f/denom);
-            dst[i] = v;
-        }
-    }
-}
-
-template<typename T> static void
-recip_f( const T* src2, size_t step2,
-         T* dst, size_t step, int width, int height, double scale )
-{
-    T scale_f = (T)scale;
-    step2 /= sizeof(src2[0]);
-    step /= sizeof(dst[0]);
-
-    Recip_SIMD<T> vop;
-
-    for( ; height--; src2 += step2, dst += step )
-    {
-        int i = vop(src2, dst, width, scale);
-        for( ; i < width; i++ )
-        {
-            T denom = src2[i];
-            dst[i] = saturate_cast<T>(scale_f/denom);
-        }
-    }
-}
-
-template<typename T, typename WT> static void
-addWeighted_( const T* src1, size_t step1, const T* src2, size_t step2,
-              T* dst, size_t step, int width, int height, void* _scalars )
-{
-    const double* scalars = (const double*)_scalars;
-    WT alpha = (WT)scalars[0], beta = (WT)scalars[1], gamma = (WT)scalars[2];
-    step1 /= sizeof(src1[0]);
-    step2 /= sizeof(src2[0]);
-    step /= sizeof(dst[0]);
-
-    AddWeighted_SIMD<T, WT> vop;
-
-    for( ; height--; src1 += step1, src2 += step2, dst += step )
-    {
-        int x = vop(src1, src2, dst, width, alpha, beta, gamma);
-        #if CV_ENABLE_UNROLLED
-        for( ; x <= width - 4; x += 4 )
-        {
-            T t0 = saturate_cast<T>(src1[x]*alpha + src2[x]*beta + gamma);
-            T t1 = saturate_cast<T>(src1[x+1]*alpha + src2[x+1]*beta + gamma);
-            dst[x] = t0; dst[x+1] = t1;
-
-            t0 = saturate_cast<T>(src1[x+2]*alpha + src2[x+2]*beta + gamma);
-            t1 = saturate_cast<T>(src1[x+3]*alpha + src2[x+3]*beta + gamma);
-            dst[x+2] = t0; dst[x+3] = t1;
-        }
-        #endif
-        for( ; x < width; x++ )
-            dst[x] = saturate_cast<T>(src1[x]*alpha + src2[x]*beta + gamma);
-    }
-}
-
-} // cv::
-
-
-#endif // __OPENCV_ARITHM_CORE_HPP__
--- a/modules/core/src/arithm_ipp.hpp
+++ b/modules/core/src/arithm_ipp.hpp
@ -0,0 +1,417 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+#if ARITHM_USE_IPP
+
+namespace cv { namespace hal {
+
+//=======================================
+// Arithmetic and logical operations
+// +, -, *, /, &, |, ^, ~, abs ...
+//=======================================
+
+#define ARITHM_IPP_BIN(fun, ...)                        \
+do {                                                    \
+    if (!CV_IPP_CHECK_COND)                             \
+        return 0;                                       \
+    if (height == 1)                                    \
+        step1 = step2 = step = width * sizeof(dst[0]);  \
+    if (0 <= CV_INSTRUMENT_FUN_IPP(fun, __VA_ARGS__))   \
+    {                                                   \
+        CV_IMPL_ADD(CV_IMPL_IPP);                       \
+        return 1;                                       \
+    }                                                   \
+    setIppErrorStatus();                                \
+    return 0;                                           \
+} while(0)
+
+//=======================================
+// Addition
+//=======================================
+
+inline int arithm_ipp_add8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
+                            uchar* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_BIN(ippiAdd_8u_C1RSfs, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0);
+}
+
+inline int arithm_ipp_add16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
+                             ushort* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_BIN(ippiAdd_16u_C1RSfs, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0);
+}
+
+inline int arithm_ipp_add16s(const short* src1, size_t step1, const short* src2, size_t step2,
+                             short* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_BIN(ippiAdd_16s_C1RSfs, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0);
+}
+
+inline int arithm_ipp_add32f(const float* src1, size_t step1, const float* src2, size_t step2,
+                             float* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_BIN(ippiAdd_32f_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
+}
+
+#define arithm_ipp_add8s(...)  0
+#define arithm_ipp_add32s(...) 0
+#define arithm_ipp_add64f(...) 0
+
+//=======================================
+// Subtract
+//=======================================
+
+inline int arithm_ipp_sub8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
+                            uchar* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_BIN(ippiSub_8u_C1RSfs, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0);
+}
+
+inline int arithm_ipp_sub16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
+                             ushort* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_BIN(ippiSub_16u_C1RSfs, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0);
+}
+
+inline int arithm_ipp_sub16s(const short* src1, size_t step1, const short* src2, size_t step2,
+                            short* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_BIN(ippiSub_16s_C1RSfs, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0);
+}
+
+inline int arithm_ipp_sub32f(const float* src1, size_t step1, const float* src2, size_t step2,
+                            float* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_BIN(ippiSub_32f_C1R, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height));
+}
+
+#define arithm_ipp_sub8s(...)  0
+#define arithm_ipp_sub32s(...) 0
+#define arithm_ipp_sub64f(...) 0
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#define ARITHM_IPP_MIN_MAX(fun, type)                            \
+do {                                                             \
+    if (!CV_IPP_CHECK_COND)                                      \
+        return 0;                                                \
+    type* s1 = (type*)src1;                                      \
+    type* s2 = (type*)src2;                                      \
+    type* d  = dst;                                              \
+    if (height == 1)                                             \
+        step1 = step2 = step = width * sizeof(dst[0]);           \
+    int i = 0;                                                   \
+    for(; i < height; i++)                                       \
+    {                                                            \
+        if (0 > CV_INSTRUMENT_FUN_IPP(fun, s1, s2, d, width))    \
+            break;                                               \
+        s1 = (type*)((uchar*)s1 + step1);                        \
+        s2 = (type*)((uchar*)s2 + step2);                        \
+        d  = (type*)((uchar*)d + step);                          \
+    }                                                            \
+    if (i == height)                                             \
+    {                                                            \
+        CV_IMPL_ADD(CV_IMPL_IPP);                                \
+        return 1;                                                \
+    }                                                            \
+    setIppErrorStatus();                                         \
+    return 0;                                                    \
+} while(0)
+
+//=======================================
+// Max
+//=======================================
+
+inline int arithm_ipp_max8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
+                           uchar* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_MIN_MAX(ippsMaxEvery_8u, uchar);
+}
+
+inline int arithm_ipp_max16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
+                             ushort* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_MIN_MAX(ippsMaxEvery_16u, ushort);
+}
+
+inline int arithm_ipp_max32f(const float* src1, size_t step1, const float* src2, size_t step2,
+                             float* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_MIN_MAX(ippsMaxEvery_32f, float);
+}
+
+inline int arithm_ipp_max64f(const double* src1, size_t step1, const double* src2, size_t step2,
+                             double* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_MIN_MAX(ippsMaxEvery_64f, double);
+}
+
+#define arithm_ipp_max8s(...)  0
+#define arithm_ipp_max16s(...) 0
+#define arithm_ipp_max32s(...) 0
+
+//=======================================
+// Min
+//=======================================
+
+inline int arithm_ipp_min8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
+                            uchar* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_MIN_MAX(ippsMinEvery_8u, uchar);
+}
+
+inline int arithm_ipp_min16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
+                            ushort* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_MIN_MAX(ippsMinEvery_16u, ushort);
+}
+
+inline int arithm_ipp_min32f(const float* src1, size_t step1, const float* src2,size_t step2,
+                             float* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_MIN_MAX(ippsMinEvery_32f, float);
+}
+
+inline int arithm_ipp_min64f(const double* src1, size_t step1, const double* src2, size_t step2,
+                             double* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_MIN_MAX(ippsMinEvery_64f, double);
+}
+
+#define arithm_ipp_min8s(...)  0
+#define arithm_ipp_min16s(...) 0
+#define arithm_ipp_min32s(...) 0
+
+//=======================================
+// AbsDiff
+//=======================================
+
+inline int arithm_ipp_absdiff8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
+                                uchar* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_BIN(ippiAbsDiff_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
+}
+
+inline int arithm_ipp_absdiff16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
+                                ushort* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_BIN(ippiAbsDiff_16u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
+}
+
+inline int arithm_ipp_absdiff32f(const float* src1, size_t step1, const float* src2, size_t step2,
+                                float* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_BIN(ippiAbsDiff_32f_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
+}
+#define arithm_ipp_absdiff8s(...)  0
+#define arithm_ipp_absdiff16s(...) 0
+#define arithm_ipp_absdiff32s(...) 0
+#define arithm_ipp_absdiff64f(...) 0
+
+//=======================================
+// Logical
+//=======================================
+
+inline int arithm_ipp_and8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
+                            uchar* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_BIN(ippiAnd_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
+}
+
+inline int arithm_ipp_or8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
+                           uchar* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_BIN(ippiOr_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
+}
+
+inline int arithm_ipp_xor8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
+                            uchar* dst, size_t step, int width, int height)
+{
+    ARITHM_IPP_BIN(ippiXor_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
+}
+
+inline int arithm_ipp_not8u(const uchar* src1, size_t step1, uchar* dst, size_t step, int width, int height)
+{
+    if (!CV_IPP_CHECK_COND)
+        return 0;
+    if (height == 1)
+        step1 = step = width * sizeof(dst[0]);
+    if (0 <= CV_INSTRUMENT_FUN_IPP(ippiNot_8u_C1R, src1, (int)step1, dst, (int)step, ippiSize(width, height)))
+    {
+        CV_IMPL_ADD(CV_IMPL_IPP);
+        return 1;
+    }
+    setIppErrorStatus();
+    return 0;
+}
+
+//=======================================
+// Compare
+//=======================================
+
+#define ARITHM_IPP_CMP(fun, ...)                          \
+do {                                                      \
+    if (!CV_IPP_CHECK_COND)                               \
+        return 0;                                         \
+    IppCmpOp op = arithm_ipp_convert_cmp(cmpop);          \
+    if (op < 0)                                           \
+        return 0;                                         \
+    if (height == 1)                                      \
+        step1 = step2 = step = width * sizeof(dst[0]);    \
+    if (0 <= CV_INSTRUMENT_FUN_IPP(fun, __VA_ARGS__, op)) \
+    {                                                     \
+        CV_IMPL_ADD(CV_IMPL_IPP);                         \
+        return 1;                                         \
+    }                                                     \
+    setIppErrorStatus();                                  \
+    return 0;                                             \
+} while(0)
+
+inline IppCmpOp arithm_ipp_convert_cmp(int cmpop)
+{
+    switch(cmpop)
+    {
+        case CMP_EQ: return ippCmpEq;
+        case CMP_GT: return ippCmpGreater;
+        case CMP_GE: return ippCmpGreaterEq;
+        case CMP_LT: return ippCmpLess;
+        case CMP_LE: return ippCmpLessEq;
+        default:     return (IppCmpOp)-1;
+    }
+}
+
+inline int arithm_ipp_cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
+                            uchar* dst, size_t step, int width, int height, int cmpop)
+{
+    ARITHM_IPP_CMP(ippiCompare_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
+}
+
+inline int arithm_ipp_cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
+                            uchar* dst, size_t step, int width, int height, int cmpop)
+{
+    ARITHM_IPP_CMP(ippiCompare_16u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
+}
+
+inline int arithm_ipp_cmp16s(const short* src1, size_t step1, const short* src2, size_t step2,
+                             uchar* dst, size_t step, int width, int height, int cmpop)
+{
+    ARITHM_IPP_CMP(ippiCompare_16s_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
+}
+
+inline int arithm_ipp_cmp32f(const float* src1, size_t step1, const float* src2, size_t step2,
+                             uchar* dst, size_t step, int width, int height, int cmpop)
+{
+    ARITHM_IPP_CMP(ippiCompare_32f_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
+}
+
+#define arithm_ipp_cmp8s(...)  0
+#define arithm_ipp_cmp32s(...) 0
+#define arithm_ipp_cmp64f(...) 0
+
+//=======================================
+// Multiply
+//=======================================
+
+#define ARITHM_IPP_MUL(fun, ...)                      \
+do {                                                  \
+    if (!CV_IPP_CHECK_COND)                           \
+        return 0;                                     \
+    float fscale = (float)scale;                      \
+    if (std::fabs(fscale - 1) > FLT_EPSILON)          \
+        return 0;                                     \
+    if (0 <= CV_INSTRUMENT_FUN_IPP(fun, __VA_ARGS__)) \
+    {                                                 \
+        CV_IMPL_ADD(CV_IMPL_IPP);                     \
+        return 1;                                     \
+    }                                                 \
+    setIppErrorStatus();                              \
+    return 0;                                         \
+} while(0)
+
+inline int arithm_ipp_mul8u(const uchar *src1, size_t step1, const uchar *src2, size_t step2,
+                            uchar *dst, size_t step, int width, int height, double scale)
+{
+    ARITHM_IPP_MUL(ippiMul_8u_C1RSfs, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height), 0);
+}
+inline int arithm_ipp_mul16u(const ushort *src1, size_t step1, const ushort *src2, size_t step2,
+                            ushort *dst, size_t step, int width, int height, double scale)
+{
+    ARITHM_IPP_MUL(ippiMul_16u_C1RSfs, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height), 0);
+}
+
+inline int arithm_ipp_mul16s(const short *src1, size_t step1, const short *src2, size_t step2,
+                            short *dst, size_t step, int width, int height, double scale)
+{
+    ARITHM_IPP_MUL(ippiMul_16s_C1RSfs, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height), 0);
+}
+
+inline int arithm_ipp_mul32f(const float *src1, size_t step1, const float *src2, size_t step2,
+                            float *dst, size_t step, int width, int height, double scale)
+{
+    ARITHM_IPP_MUL(ippiMul_32f_C1R, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height));
+}
+
+#define arithm_ipp_mul8s(...)  0
+#define arithm_ipp_mul32s(...) 0
+#define arithm_ipp_mul64f(...) 0
+
+//=======================================
+// Div
+//=======================================
+
+#define arithm_ipp_div8u(...)  0
+#define arithm_ipp_div8s(...)  0
+#define arithm_ipp_div16u(...) 0
+#define arithm_ipp_div16s(...) 0
+#define arithm_ipp_div32s(...) 0
+#define arithm_ipp_div32f(...) 0
+#define arithm_ipp_div64f(...) 0
+
+//=======================================
+// AddWeighted
+//=======================================
+
+#define arithm_ipp_addWeighted8u(...)  0
+#define arithm_ipp_addWeighted8s(...)  0
+#define arithm_ipp_addWeighted16u(...) 0
+#define arithm_ipp_addWeighted16s(...) 0
+#define arithm_ipp_addWeighted32s(...) 0
+#define arithm_ipp_addWeighted32f(...) 0
+#define arithm_ipp_addWeighted64f(...) 0
+
+//=======================================
+// Reciprocial
+//=======================================
+
+#define arithm_ipp_recip8u(...)  0
+#define arithm_ipp_recip8s(...)  0
+#define arithm_ipp_recip16u(...) 0
+#define arithm_ipp_recip16s(...) 0
+#define arithm_ipp_recip32s(...) 0
+#define arithm_ipp_recip32f(...) 0
+#define arithm_ipp_recip64f(...) 0
+
+/** empty block in case if you have "fun"
+#define arithm_ipp_8u(...)  0
+#define arithm_ipp_8s(...)  0
+#define arithm_ipp_16u(...) 0
+#define arithm_ipp_16s(...) 0
+#define arithm_ipp_32s(...) 0
+#define arithm_ipp_32f(...) 0
+#define arithm_ipp_64f(...) 0
+**/
+
+}} // cv::hal::
+
+#define ARITHM_CALL_IPP(fun, ...)       \
+{                                       \
+    if (__CV_EXPAND(fun(__VA_ARGS__)))  \
+        return;                         \
+}
+
+#endif // ARITHM_USE_IPP
+
+
+#if !ARITHM_USE_IPP
+#define ARITHM_CALL_IPP(...)
+#endif
--- a/modules/core/src/arithm_simd.hpp
+++ b/modules/core/src/arithm_simd.hpp
--- a/modules/core/src/command_line_parser.cpp
+++ b/modules/core/src/command_line_parser.cpp
@ -119,7 +119,7 @@ static void from_str(const String& str, Param type, void* dst)

 void CommandLineParser::getByName(const String& name, bool space_delete, Param type, void* dst) const
 {
-    CV_TRY
+    try
    {
        for (size_t i = 0; i < impl->data.size(); i++)
        {
@ -144,19 +144,20 @@ void CommandLineParser::getByName(const String& name, bool space_delete, Param t
            }
        }
    }
-    CV_CATCH (Exception, e)
+    catch (const Exception& e)
    {
        impl->error = true;
        impl->error_message = impl->error_message + "Parameter '"+ name + "': " + e.err + "\n";
        return;
    }
+
    CV_Error_(Error::StsBadArg, ("undeclared key '%s' requested", name.c_str()));
 }


 void CommandLineParser::getByIndex(int index, bool space_delete, Param type, void* dst) const
 {
-    CV_TRY
+    try
    {
        for (size_t i = 0; i < impl->data.size(); i++)
        {
@ -176,12 +177,13 @@ void CommandLineParser::getByIndex(int index, bool space_delete, Param type, voi
            }
        }
    }
-    CV_CATCH(Exception, e)
+    catch (const Exception& e)
    {
        impl->error = true;
        impl->error_message = impl->error_message + format("Parameter #%d: ", index) + e.err + "\n";
        return;
    }
+
    CV_Error_(Error::StsBadArg, ("undeclared position %d requested", index));
 }

@ -455,13 +457,14 @@ std::vector<String> CommandLineParser::Impl::split_range_string(const String& _s
    std::vector<String> vec;
    String word = "";
    bool begin = false;
+
    while (!str.empty())
    {
        if (str[0] == fs)
        {
            if (begin == true)
            {
-                CV_THROW (cv::Exception(CV_StsParseError,
+                throw cv::Exception(CV_StsParseError,
                         String("error in split_range_string(")
                         + str
                         + String(", ")
@ -470,7 +473,7 @@ std::vector<String> CommandLineParser::Impl::split_range_string(const String& _s
                         + String(1, ss)
                         + String(")"),
                         "", __FILE__, __LINE__
-                         ));
+                         );
            }
            begin = true;
            word = "";
@ -481,7 +484,7 @@ std::vector<String> CommandLineParser::Impl::split_range_string(const String& _s
        {
            if (begin == false)
            {
-                CV_THROW (cv::Exception(CV_StsParseError,
+                throw cv::Exception(CV_StsParseError,
                         String("error in split_range_string(")
                         + str
                         + String(", ")
@ -490,7 +493,7 @@ std::vector<String> CommandLineParser::Impl::split_range_string(const String& _s
                         + String(1, ss)
                         + String(")"),
                         "", __FILE__, __LINE__
-                         ));
+                         );
            }
            begin = false;
            vec.push_back(word);
@ -505,7 +508,7 @@ std::vector<String> CommandLineParser::Impl::split_range_string(const String& _s

    if (begin == true)
    {
-        CV_THROW (cv::Exception(CV_StsParseError,
+        throw cv::Exception(CV_StsParseError,
                 String("error in split_range_string(")
                 + str
                 + String(", ")
@ -514,8 +517,9 @@ std::vector<String> CommandLineParser::Impl::split_range_string(const String& _s
                 + String(1, ss)
                 + String(")"),
                 "", __FILE__, __LINE__
-                ));
+                );
    }
+
    return vec;
 }

--- a/modules/core/src/convert.cpp
+++ b/modules/core/src/convert.cpp
@ -442,7 +442,6 @@ void cv::Mat::convertTo(OutputArray _dst, int _type, double alpha, double beta)
        _dst.create( dims, size, _type );
    Mat dst = _dst.getMat();

-
    BinaryFunc func = noScale ? getConvertFunc(sdepth, ddepth) : getConvertScaleFunc(sdepth, ddepth);
    double scale[] = {alpha, beta};
    int cn = channels();
@ -450,7 +449,7 @@ void cv::Mat::convertTo(OutputArray _dst, int _type, double alpha, double beta)

    if( dims <= 2 )
    {
-        Size sz = getContinuousSize(src, dst, cn);
+        Size sz = getContinuousSize2D(src, dst, cn);
        func( src.data, src.step, 0, 0, dst.data, dst.step, sz, scale );
    }
    else
@ -511,7 +510,7 @@ void cv::convertFp16( InputArray _src, OutputArray _dst )

    if( src.dims <= 2 )
    {
-        Size sz = getContinuousSize(src, dst, cn);
+        Size sz = getContinuousSize2D(src, dst, cn);
        func( src.data, src.step, 0, 0, dst.data, dst.step, sz, 0);
    }
    else
--- a/modules/core/src/convert_scale.cpp
+++ b/modules/core/src/convert_scale.cpp
@ -426,7 +426,7 @@ void cv::convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, doubl

    if( src.dims <= 2 )
    {
-        Size sz = getContinuousSize(src, dst, cn);
+        Size sz = getContinuousSize2D(src, dst, cn);
        func( src.ptr(), src.step, 0, 0, dst.ptr(), dst.step, sz, scale );
    }
    else
--- a/modules/core/src/copy.cpp
+++ b/modules/core/src/copy.cpp
@ -287,23 +287,19 @@ void Mat::copyTo( OutputArray _dst ) const

        if( rows > 0 && cols > 0 )
        {
-            // For some cases (with vector) dst.size != src.size, so force to column-based form
-            // It prevents memory corruption in case of column-based src
-            if (_dst.isVector())
-                dst = dst.reshape(0, (int)dst.total());
+            Mat src = *this;
+            Size sz = getContinuousSize2D(src, dst, (int)elemSize());
+            CV_CheckGE(sz.width, 0, "");

-            const uchar* sptr = data;
+            const uchar* sptr = src.data;
            uchar* dptr = dst.data;

 #if IPP_VERSION_X100 >= 201700
-            CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiCopy_8u_C1R_L, sptr, (int)step, dptr, (int)dst.step, ippiSizeL((int)(cols*elemSize()), rows)) >= 0)
+            CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiCopy_8u_C1R_L, sptr, (int)src.step, dptr, (int)dst.step, ippiSizeL(sz.width, sz.height)) >= 0)
 #endif

-            Size sz = getContinuousSize(*this, dst);
-            size_t len = sz.width*elemSize();
-
-            for( ; sz.height--; sptr += step, dptr += dst.step )
-                memcpy( dptr, sptr, len );
+            for (; sz.height--; sptr += src.step, dptr += dst.step)
+                memcpy(dptr, sptr, sz.width);
        }
        return;
    }
@ -403,8 +399,9 @@ void Mat::copyTo( OutputArray _dst, InputArray _mask ) const

    if( dims <= 2 )
    {
-        Size sz = getContinuousSize(*this, dst, mask, mcn);
-        copymask(data, step, mask.data, mask.step, dst.data, dst.step, sz, &esz);
+        Mat src = *this;
+        Size sz = getContinuousSize2D(src, dst, mask, mcn);
+        copymask(src.data, src.step, mask.data, mask.step, dst.data, dst.step, sz, &esz);
        return;
    }

--- a/modules/core/src/glob.cpp
+++ b/modules/core/src/glob.cpp
@ -231,7 +231,7 @@ static void glob_rec(const cv::String& directory, const cv::String& wildchart, s
    if ((dir = opendir (directory.c_str())) != 0)
    {
        /* find all the files and directories within directory */
-        CV_TRY
+        try
        {
            struct dirent *ent;
            while ((ent = readdir (dir)) != 0)
@ -255,10 +255,10 @@ static void glob_rec(const cv::String& directory, const cv::String& wildchart, s
                    result.push_back(entry);
            }
        }
-        CV_CATCH_ALL
+        catch (...)
        {
            closedir(dir);
-            CV_RETHROW();
+            throw;
        }
        closedir(dir);
    }
--- a/modules/core/src/lda.cpp
+++ b/modules/core/src/lda.cpp
@ -866,7 +866,7 @@ private:
        d = alloc_1d<double> (n);
        e = alloc_1d<double> (n);
        ort = alloc_1d<double> (n);
-        CV_TRY {
+        try {
            // Reduce to Hessenberg form.
            orthes();
            // Reduce Hessenberg to real Schur form.
@ -884,10 +884,10 @@ private:
            // Deallocate the memory by releasing all internal working data.
            release();
        }
-        CV_CATCH_ALL
+        catch (...)
        {
            release();
-            CV_RETHROW();
+            throw;
        }
    }

--- a/modules/core/src/lut.cpp
+++ b/modules/core/src/lut.cpp
@ -120,11 +120,11 @@ static bool openvx_LUT(Mat src, Mat dst, Mat _lut)
        lut.copyFrom(_lut);
        ivx::IVX_CHECK_STATUS(vxuTableLookup(ctx, ia, lut, ib));
    }
-    catch (ivx::RuntimeError & e)
+    catch (const ivx::RuntimeError& e)
    {
        VX_DbgThrow(e.what());
    }
-    catch (ivx::WrapperError & e)
+    catch (const ivx::WrapperError& e)
    {
        VX_DbgThrow(e.what());
    }
--- a/modules/core/src/mathfuncs.cpp
+++ b/modules/core/src/mathfuncs.cpp
@ -1489,7 +1489,7 @@ bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double ma
    {
        int i, loc = 0;
        int cn = src.channels();
-        Size size = getContinuousSize( src, cn );
+        Size size = getContinuousSize2D(src, cn);

        if( depth == CV_32F )
        {
--- a/modules/core/src/matrix.cpp
+++ b/modules/core/src/matrix.cpp
@ -416,7 +416,7 @@ Mat::Mat(const Mat& m, const Range& _rowRange, const Range& _colRange)
    }

    *this = m;
-    CV_TRY
+    try
    {
        if( _rowRange != Range::all() && _rowRange != Range(0,rows) )
        {
@ -436,10 +436,10 @@ Mat::Mat(const Mat& m, const Range& _rowRange, const Range& _colRange)
            flags |= SUBMATRIX_FLAG;
        }
    }
-    CV_CATCH_ALL
+    catch(...)
    {
        release();
-        CV_RETHROW();
+        throw;
    }

    updateContinuityFlag();
@ -943,4 +943,77 @@ int Mat::checkVector(int _elemChannels, int _depth, bool _requireContinuous) con
    ? (int)(total()*channels()/_elemChannels) : -1;
 }

+
+static inline Size getContinuousSize_(int flags, int cols, int rows, int widthScale)
+{
+    int64 sz = (int64)cols * rows * widthScale;
+    bool has_int_overflow = sz >= INT_MAX;
+    bool isContiguous = (flags & Mat::CONTINUOUS_FLAG) != 0;
+    return (isContiguous && !has_int_overflow)
+            ? Size((int)sz, 1)
+            : Size(cols * widthScale, rows);
+}
+
+Size getContinuousSize2D(Mat& m1, int widthScale)
+{
+    CV_CheckLE(m1.dims, 2, "");
+    return getContinuousSize_(m1.flags,
+                              m1.cols, m1.rows, widthScale);
+}
+Size getContinuousSize2D(Mat& m1, Mat& m2, int widthScale)
+{
+    CV_CheckLE(m1.dims, 2, "");
+    CV_CheckLE(m2.dims, 2, "");
+    const Size sz1 = m1.size();
+    if (sz1 != m2.size())  // reshape all matrixes to the same size (#4159)
+    {
+        size_t total_sz = m1.total();
+        CV_CheckEQ(total_sz, m2.total(), "");
+        bool is_m1_vector = m1.cols == 1 || m1.rows == 1;
+        bool is_m2_vector = m2.cols == 1 || m2.rows == 1;
+        CV_Assert(is_m1_vector); CV_Assert(is_m2_vector);
+        int total = (int)total_sz;  // vector-column
+        bool isContiguous = ((m1.flags & m2.flags) & Mat::CONTINUOUS_FLAG) != 0;
+        bool has_int_overflow = ((int64)total_sz * widthScale) >= INT_MAX;
+        if (isContiguous && !has_int_overflow)
+            total = 1; // vector-row
+        m1 = m1.reshape(0, total);
+        m2 = m2.reshape(0, total);
+        CV_Assert(m1.cols == m2.cols && m1.rows == m2.rows);
+        return Size(m1.cols * widthScale, m1.rows);
+    }
+    return getContinuousSize_(m1.flags & m2.flags,
+                              m1.cols, m1.rows, widthScale);
+}
+
+Size getContinuousSize2D(Mat& m1, Mat& m2, Mat& m3, int widthScale)
+{
+    CV_CheckLE(m1.dims, 2, "");
+    CV_CheckLE(m2.dims, 2, "");
+    CV_CheckLE(m3.dims, 2, "");
+    const Size sz1 = m1.size();
+    if (sz1 != m2.size() || sz1 != m3.size())  // reshape all matrixes to the same size (#4159)
+    {
+        size_t total_sz = m1.total();
+        CV_CheckEQ(total_sz, m2.total(), "");
+        CV_CheckEQ(total_sz, m3.total(), "");
+        bool is_m1_vector = m1.cols == 1 || m1.rows == 1;
+        bool is_m2_vector = m2.cols == 1 || m2.rows == 1;
+        bool is_m3_vector = m3.cols == 1 || m3.rows == 1;
+        CV_Assert(is_m1_vector); CV_Assert(is_m2_vector); CV_Assert(is_m3_vector);
+        int total = (int)total_sz;  // vector-column
+        bool isContiguous = ((m1.flags & m2.flags & m3.flags) & Mat::CONTINUOUS_FLAG) != 0;
+        bool has_int_overflow = ((int64)total_sz * widthScale) >= INT_MAX;
+        if (isContiguous && !has_int_overflow)
+            total = 1; // vector-row
+        m1 = m1.reshape(0, total);
+        m2 = m2.reshape(0, total);
+        m3 = m3.reshape(0, total);
+        CV_Assert(m1.cols == m2.cols && m1.rows == m2.rows && m1.cols == m3.cols && m1.rows == m3.rows);
+        return Size(m1.cols * widthScale, m1.rows);
+    }
+    return getContinuousSize_(m1.flags & m2.flags & m3.flags,
+                              m1.cols, m1.rows, widthScale);
+}
+
 } // cv::
--- a/modules/core/src/mean.cpp
+++ b/modules/core/src/mean.cpp
@ -654,11 +654,11 @@ static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv
                    pstddev[c] = 0;
            }
        }
-        catch (ivx::RuntimeError & e)
+        catch (const ivx::RuntimeError & e)
        {
            VX_DbgThrow(e.what());
        }
-        catch (ivx::WrapperError & e)
+        catch (const ivx::WrapperError & e)
        {
            VX_DbgThrow(e.what());
        }
--- a/modules/core/src/minmax.cpp
+++ b/modules/core/src/minmax.cpp
@ -439,11 +439,11 @@ static bool openvx_minMaxIdx(Mat &src, double* minVal, double* maxVal, int* minI
            ofs2idx(src, maxidx, maxIdx);
        }
    }
-    catch (ivx::RuntimeError & e)
+    catch (const ivx::RuntimeError & e)
    {
        VX_DbgThrow(e.what());
    }
-    catch (ivx::WrapperError & e)
+    catch (const ivx::WrapperError & e)
    {
        VX_DbgThrow(e.what());
    }
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@ -894,11 +894,11 @@ bool useOpenCL()
    CoreTLSData* data = getCoreTlsData().get();
    if( data->useOpenCL < 0 )
    {
-        CV_TRY
+        try
        {
            data->useOpenCL = (int)(haveOpenCL() && Device::getDefault().ptr() && Device::getDefault().available()) ? 1 : 0;
        }
-        CV_CATCH_ALL
+        catch (...)
        {
            data->useOpenCL = 0;
        }
--- a/modules/core/src/precomp.hpp
+++ b/modules/core/src/precomp.hpp
@ -86,7 +86,6 @@
 #include "opencv2/core/sse_utils.hpp"
 #include "opencv2/core/neon_utils.hpp"
 #include "opencv2/core/vsx_utils.hpp"
-#include "arithm_core.hpp"
 #include "hal_replacement.hpp"

 #define GET_OPTIMIZED(func) (func)
@ -106,6 +105,102 @@ extern const uchar g_Saturate8u[];
 #define CV_MIN_8U(a,b)       ((a) - CV_FAST_CAST_8U((a) - (b)))
 #define CV_MAX_8U(a,b)       ((a) + CV_FAST_CAST_8U((b) - (a)))

+template<typename T1, typename T2=T1, typename T3=T1> struct OpAdd
+{
+    typedef T1 type1;
+    typedef T2 type2;
+    typedef T3 rtype;
+    T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a + b); }
+};
+
+template<typename T1, typename T2=T1, typename T3=T1> struct OpSub
+{
+    typedef T1 type1;
+    typedef T2 type2;
+    typedef T3 rtype;
+    T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a - b); }
+};
+
+template<typename T1, typename T2=T1, typename T3=T1> struct OpRSub
+{
+    typedef T1 type1;
+    typedef T2 type2;
+    typedef T3 rtype;
+    T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(b - a); }
+};
+
+template<typename T> struct OpMin
+{
+    typedef T type1;
+    typedef T type2;
+    typedef T rtype;
+    T operator ()(const T a, const T b) const { return std::min(a, b); }
+};
+
+template<typename T> struct OpMax
+{
+    typedef T type1;
+    typedef T type2;
+    typedef T rtype;
+    T operator ()(const T a, const T b) const { return std::max(a, b); }
+};
+
+template<typename T> struct OpAbsDiff
+{
+    typedef T type1;
+    typedef T type2;
+    typedef T rtype;
+    T operator()(T a, T b) const { return a > b ? a - b : b - a; }
+};
+
+// specializations to prevent "-0" results
+template<> struct OpAbsDiff<float>
+{
+    typedef float type1;
+    typedef float type2;
+    typedef float rtype;
+    float operator()(float a, float b) const { return std::abs(a - b); }
+};
+template<> struct OpAbsDiff<double>
+{
+    typedef double type1;
+    typedef double type2;
+    typedef double rtype;
+    double operator()(double a, double b) const { return std::abs(a - b); }
+};
+
+template<typename T> struct OpAnd
+{
+    typedef T type1;
+    typedef T type2;
+    typedef T rtype;
+    T operator()( T a, T b ) const { return a & b; }
+};
+
+template<typename T> struct OpOr
+{
+    typedef T type1;
+    typedef T type2;
+    typedef T rtype;
+    T operator()( T a, T b ) const { return a | b; }
+};
+
+template<typename T> struct OpXor
+{
+    typedef T type1;
+    typedef T type2;
+    typedef T rtype;
+    T operator()( T a, T b ) const { return a ^ b; }
+};
+
+template<typename T> struct OpNot
+{
+    typedef T type1;
+    typedef T type2;
+    typedef T rtype;
+    T operator()( T a, T ) const { return ~a; }
+};
+
 template<> inline uchar OpAdd<uchar>::operator ()(uchar a, uchar b) const
 { return CV_FAST_CAST_8U(a + b); }

@ -149,47 +244,12 @@ BinaryFunc getCopyMaskFunc(size_t esz);
 /* maximal average node_count/hash_size ratio beyond which hash table is resized */
 #define  CV_SPARSE_HASH_RATIO    3

-inline Size getContinuousSize_( int flags, int cols, int rows, int widthScale )
-{
-    int64 sz = (int64)cols * rows * widthScale;
-    return (flags & Mat::CONTINUOUS_FLAG) != 0 &&
-        (int)sz == sz ? Size((int)sz, 1) : Size(cols * widthScale, rows);
-}
-
-inline Size getContinuousSize( const Mat& m1, int widthScale=1 )
-{
-    return getContinuousSize_(m1.flags,
-                              m1.cols, m1.rows, widthScale);
-}
-
-inline Size getContinuousSize( const Mat& m1, const Mat& m2, int widthScale=1 )
-{
-    return getContinuousSize_(m1.flags & m2.flags,
-                              m1.cols, m1.rows, widthScale);
-}
-
-inline Size getContinuousSize( const Mat& m1, const Mat& m2,
-                               const Mat& m3, int widthScale=1 )
-{
-    return getContinuousSize_(m1.flags & m2.flags & m3.flags,
-                              m1.cols, m1.rows, widthScale);
-}
-
-inline Size getContinuousSize( const Mat& m1, const Mat& m2,
-                               const Mat& m3, const Mat& m4,
-                               int widthScale=1 )
-{
-    return getContinuousSize_(m1.flags & m2.flags & m3.flags & m4.flags,
-                              m1.cols, m1.rows, widthScale);
-}
-
-inline Size getContinuousSize( const Mat& m1, const Mat& m2,
-                               const Mat& m3, const Mat& m4,
-                               const Mat& m5, int widthScale=1 )
-{
-    return getContinuousSize_(m1.flags & m2.flags & m3.flags & m4.flags & m5.flags,
-                              m1.cols, m1.rows, widthScale);
-}
+// There is some mess in code with vectors representation.
+// Both vector-column / vector-rows are used with dims=2 (as Mat2D always).
+// Reshape matrices if neccessary (in case of vectors) and returns size with scaled width.
+Size getContinuousSize2D(Mat& m1, int widthScale=1);
+Size getContinuousSize2D(Mat& m1, Mat& m2, int widthScale=1);
+Size getContinuousSize2D(Mat& m1, Mat& m2, Mat& m3, int widthScale=1);

 void setSize( Mat& m, int _dims, const int* _sz, const size_t* _steps, bool autoSteps=false );
 void finalizeHdr(Mat& m);
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@ -1029,7 +1029,7 @@ void error( const Exception& exc )
        *p = 0;
    }

-    CV_THROW(exc);
+    throw exc;
 #ifdef __GNUC__
 # if !defined __clang__ && !defined __APPLE__
    // this suppresses this warning: "noreturn" function does return [enabled by default]
--- a/modules/core/src/umatrix.cpp
+++ b/modules/core/src/umatrix.cpp
@ -367,11 +367,11 @@ UMat Mat::getUMat(AccessFlag accessFlags, UMatUsageFlags usageFlags) const
        new_u->originalUMatData = u;
    }
    bool allocated = false;
-    CV_TRY
+    try
    {
        allocated = UMat::getStdAllocator()->allocate(new_u, accessFlags, usageFlags);
    }
-    CV_CATCH(cv::Exception, e)
+    catch (const cv::Exception& e)
    {
        fprintf(stderr, "Exception: %s\n", e.what());
    }
@ -442,12 +442,12 @@ void UMat::create(int d, const int* _sizes, int _type, UMatUsageFlags _usageFlag
            a = a0;
            a0 = Mat::getDefaultAllocator();
        }
-        CV_TRY
+        try
        {
            u = a->allocate(dims, size, _type, 0, step.p, ACCESS_RW /* ignored */, usageFlags);
            CV_Assert(u != 0);
        }
-        CV_CATCH_ALL
+        catch(...)
        {
            if(a != a0)
                u = a0->allocate(dims, size, _type, 0, step.p, ACCESS_RW /* ignored */, usageFlags);
--- a/modules/core/test/test_intrin_utils.hpp
+++ b/modules/core/test/test_intrin_utils.hpp
@ -119,11 +119,15 @@ template <typename R> struct Data
            d[i] += (LaneType)m;
        return *this;
    }
-    void fill(LaneType val)
+    void fill(LaneType val, int s, int c = R::nlanes)
    {
-        for (int i = 0; i < R::nlanes; ++i)
+        for (int i = s; i < c; ++i)
            d[i] = val;
    }
+    void fill(LaneType val)
+    {
+        fill(val, 0);
+    }
    void reverse()
    {
        for (int i = 0; i < R::nlanes / 2; ++i)
@ -739,6 +743,23 @@ template<typename R> struct TheTest
        return *this;
    }

+    TheTest & test_absdiffs()
+    {
+        Data<R> dataA(std::numeric_limits<LaneType>::max()),
+                dataB(std::numeric_limits<LaneType>::min());
+        dataA[0] = (LaneType)-1;
+        dataB[0] = 1;
+        dataA[1] = 2;
+        dataB[1] = (LaneType)-2;
+        R a = dataA, b = dataB;
+        Data<R> resC = v_absdiffs(a, b);
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            EXPECT_EQ(saturate_cast<LaneType>(std::abs(dataA[i] - dataB[i])), resC[i]);
+        }
+        return *this;
+    }
+
    TheTest & test_reduce()
    {
        Data<R> dataA;
@ -874,6 +895,81 @@ template<typename R> struct TheTest
        return *this;
    }

+    // v_uint8 only
+    TheTest & test_pack_b()
+    {
+        // 16-bit
+        Data<R> dataA, dataB;
+        dataB.fill(0, R::nlanes / 2);
+
+        R a = dataA, b = dataB;
+        Data<R> maskA = a == b, maskB = a != b;
+
+        a = maskA; b = maskB;
+        Data<R> res  = v_pack_b(v_reinterpret_as_u16(a), v_reinterpret_as_u16(b));
+        for (int i = 0; i < v_uint16::nlanes; ++i)
+        {
+            SCOPED_TRACE(cv::format("i=%d", i));
+            EXPECT_EQ(maskA[i * 2], res[i]);
+            EXPECT_EQ(maskB[i * 2], res[i + v_uint16::nlanes]);
+        }
+
+        // 32-bit
+        Data<R> dataC, dataD;
+        dataD.fill(0, R::nlanes / 2);
+
+        R c = dataC, d = dataD;
+        Data<R> maskC = c == d, maskD = c != d;
+
+        c = maskC; d = maskD;
+        res = v_pack_b
+        (
+            v_reinterpret_as_u32(a), v_reinterpret_as_u32(b),
+            v_reinterpret_as_u32(c), v_reinterpret_as_u32(d)
+        );
+
+        for (int i = 0; i < v_uint32::nlanes; ++i)
+        {
+            SCOPED_TRACE(cv::format("i=%d", i));
+            EXPECT_EQ(maskA[i * 4], res[i]);
+            EXPECT_EQ(maskB[i * 4], res[i + v_uint32::nlanes]);
+            EXPECT_EQ(maskC[i * 4], res[i + v_uint32::nlanes * 2]);
+            EXPECT_EQ(maskD[i * 4], res[i + v_uint32::nlanes * 3]);
+        }
+
+        // 64-bit
+        Data<R> dataE, dataF, dataG(0), dataH(0xFF);
+        dataF.fill(0, R::nlanes / 2);
+
+        R e = dataE, f = dataF, g = dataG, h = dataH;
+        Data<R> maskE = e == f, maskF = e != f;
+
+        e = maskE; f = maskF;
+        res = v_pack_b
+        (
+            v_reinterpret_as_u64(a), v_reinterpret_as_u64(b),
+            v_reinterpret_as_u64(c), v_reinterpret_as_u64(d),
+            v_reinterpret_as_u64(e), v_reinterpret_as_u64(f),
+            v_reinterpret_as_u64(g), v_reinterpret_as_u64(h)
+        );
+
+        for (int i = 0; i < v_uint64::nlanes; ++i)
+        {
+            SCOPED_TRACE(cv::format("i=%d", i));
+            EXPECT_EQ(maskA[i * 8], res[i]);
+            EXPECT_EQ(maskB[i * 8], res[i + v_uint64::nlanes]);
+            EXPECT_EQ(maskC[i * 8], res[i + v_uint64::nlanes * 2]);
+            EXPECT_EQ(maskD[i * 8], res[i + v_uint64::nlanes * 3]);
+
+            EXPECT_EQ(maskE[i * 8], res[i + v_uint64::nlanes * 4]);
+            EXPECT_EQ(maskF[i * 8], res[i + v_uint64::nlanes * 5]);
+            EXPECT_EQ(dataG[i * 8], res[i + v_uint64::nlanes * 6]);
+            EXPECT_EQ(dataH[i * 8], res[i + v_uint64::nlanes * 7]);
+        }
+
+        return *this;
+    }
+
    TheTest & test_unpack()
    {
        Data<R> dataA, dataB;
@ -1228,6 +1324,7 @@ void test_hal_intrin_uint8()
        .test_popcount()
        .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
        .test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>()
+        .test_pack_b()
        .test_unpack()
        .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
        .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
@ -1259,6 +1356,7 @@ void test_hal_intrin_int8()
        .test_logic()
        .test_min_max()
        .test_absdiff()
+        .test_absdiffs()
        .test_abs()
        .test_mask()
        .test_popcount()
@ -1317,6 +1415,7 @@ void test_hal_intrin_int16()
        .test_logic()
        .test_min_max()
        .test_absdiff()
+        .test_absdiffs()
        .test_abs()
        .test_reduce()
        .test_mask()
--- a/modules/core/test/test_mat.cpp
+++ b/modules/core/test/test_mat.cpp
@ -1930,5 +1930,36 @@ TEST(Core_InputArray, support_CustomType)
    }
 }

+TEST(Core_Vectors, issue_13078)
+{
+    float floats_[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+    std::vector<float> floats(floats_, floats_ + 8);
+    std::vector<int> ints(4);
+
+    Mat m(4, 1, CV_32FC1, floats.data(), sizeof(floats[0]) * 2);
+
+    m.convertTo(ints, CV_32S);
+
+    ASSERT_EQ(1, ints[0]);
+    ASSERT_EQ(3, ints[1]);
+    ASSERT_EQ(5, ints[2]);
+    ASSERT_EQ(7, ints[3]);
+}
+
+TEST(Core_Vectors, issue_13078_workaround)
+{
+    float floats_[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+    std::vector<float> floats(floats_, floats_ + 8);
+    std::vector<int> ints(4);
+
+    Mat m(4, 1, CV_32FC1, floats.data(), sizeof(floats[0]) * 2);
+
+    m.convertTo(Mat(ints), CV_32S);
+
+    ASSERT_EQ(1, ints[0]);
+    ASSERT_EQ(3, ints[1]);
+    ASSERT_EQ(5, ints[2]);
+    ASSERT_EQ(7, ints[3]);
+}

 }} // namespace
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@ -1891,44 +1891,46 @@ struct Net::Impl
                }

                // fuse convolution layer followed by eltwise + relu
-                if ( IS_DNN_OPENCL_TARGET(preferableTarget) )
+                if ( IS_DNN_OPENCL_TARGET(preferableTarget) && ld.layerInstance->type == "Convolution" )
                {
                    Ptr<EltwiseLayer> nextEltwiseLayer;
                    if( nextData )
                        nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();

-                    if( !nextEltwiseLayer.empty() && pinsToKeep.count(lpNext) == 0 )
+                    if( !nextEltwiseLayer.empty() && pinsToKeep.count(lpNext) == 0 &&
+                        nextData->inputBlobsId.size() == 2 )
                    {
                        LayerData *eltwiseData = nextData;
-                        // go down from the second input and find the first non-skipped layer.
-                        LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[1].lid];
+
+                        // Eltwise layer has two inputs. We need to determine which
+                        // is a base convolution layer and which could be used as it's bias.
+                        LayerData* biasLayerData = 0;
+                        for (int i = 0; i < 2; ++i)
+                        {
+                            LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[i].lid];
                            CV_Assert(downLayerData);
                            while (downLayerData->skip)
                            {
+                                if (downLayerData->inputBlobsId.size() == 1)
                                    downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
-                        }
-                        CV_Assert(downLayerData);
-
-                        // second input layer is current layer.
-                        if ( ld.id == downLayerData->id )
+                                else
                                {
-                            // go down from the first input and find the first non-skipped layer
-                            downLayerData = &layers[eltwiseData->inputBlobsId[0].lid];
-                            while (downLayerData->skip)
+                                    downLayerData = 0;
+                                    break;
+                                }
+                            }
+                            if (downLayerData && ld.id == downLayerData->id)
                            {
-                                if ( !downLayerData->type.compare("Eltwise") )
-                                    downLayerData = &layers[downLayerData->inputBlobsId[1].lid];
-                                else
-                                    downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
+                                biasLayerData = &layers[eltwiseData->inputBlobsId[1 - i].lid];
+                                break;
                            }
-
-                            Ptr<ConvolutionLayer> convLayer = downLayerData->layerInstance.dynamicCast<ConvolutionLayer>();
-
-                            //  first input layer is convolution layer
-                            if( !convLayer.empty() && eltwiseData->consumers.size() == 1 )
+                        }
+                        CV_Assert(biasLayerData);
+                        {
+                            if( eltwiseData->consumers.size() == 1 )
                            {
                                // fuse eltwise + activation layer
-                                LayerData *firstConvLayerData = downLayerData;
+                                if (biasLayerData->id < ld.id)
                                {
                                    nextData = &layers[eltwiseData->consumers[0].lid];
                                    lpNext = LayerPin(eltwiseData->consumers[0].lid, 0);
@ -1942,8 +1944,8 @@ struct Net::Impl
                                             !nextData->type.compare("Power")) &&
                                            currLayer->setActivation(nextActivLayer) )
                                    {
-                                        CV_Assert(firstConvLayerData->outputBlobsWrappers.size() == 1 && ld.inputBlobsWrappers.size() == 1);
-                                        ld.inputBlobsWrappers.push_back(firstConvLayerData->outputBlobsWrappers[0]);
+                                        CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1);
+                                        ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]);
                                        printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
                                        printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
                                        eltwiseData->skip = true;
@ -1994,9 +1996,6 @@ struct Net::Impl
                }
            }

-            if (preferableBackend != DNN_BACKEND_OPENCV)
-                continue;  // Go to the next layer.
-
            // the optimization #2. if there is no layer that takes max pooling layer's computed
            // max indices (and only some semantical segmentation networks might need this;
            // many others only take the maximum values), then we switch the max pooling
@ -3184,7 +3183,7 @@ void Net::setHalideScheduler(const String& scheduler)
 int64 Net::getPerfProfile(std::vector<double>& timings)
 {
    timings = std::vector<double>(impl->layersTimings.begin() + 1, impl->layersTimings.end());
-    int64 total = std::accumulate(timings.begin(), timings.end(), 0);
+    int64 total = (int64)std::accumulate(timings.begin(), timings.end(), 0.0);
    return total;
 }

--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@ -96,7 +96,6 @@ public:
        else if (params.has("pooled_w") || params.has("pooled_h"))
        {
            type = ROI;
-            computeMaxIdx = false;
            pooledSize.width = params.get<uint32_t>("pooled_w", 1);
            pooledSize.height = params.get<uint32_t>("pooled_h", 1);
        }
@ -142,6 +141,7 @@ public:
 #ifdef HAVE_OPENCL
        poolOp.release();
 #endif
+        computeMaxIdx = type == MAX;
    }

    virtual bool supportBackend(int backendId) CV_OVERRIDE
@ -193,19 +193,14 @@ public:
            poolOp = Ptr<OCL4DNNPool<float> >(new OCL4DNNPool<float>(config));
        }

-        for (size_t ii = 0; ii < inputs.size(); ii++)
-        {
-            UMat& inpMat = inputs[ii];
-            int out_index = (type == MAX) ? 2 : 1;
-            UMat& outMat = outputs[out_index * ii];
-            UMat maskMat = (type == MAX) ? outputs[2 * ii + 1] : UMat();
+        CV_Assert_N(inputs.size() == 1, !outputs.empty(), !computeMaxIdx || outputs.size() == 2);
+        UMat& inpMat = inputs[0];
+        UMat& outMat = outputs[0];
+        UMat maskMat = computeMaxIdx ? outputs[1] : UMat();

        CV_Assert(inpMat.offset == 0 && outMat.offset == 0);

-            if (!poolOp->Forward(inpMat, outMat, maskMat))
-                return false;
-        }
-        return true;
+        return poolOp->Forward(inpMat, outMat, maskMat);
    }
 #endif

@ -232,9 +227,12 @@ public:
        switch (type)
        {
            case MAX:
-                CV_Assert_N(inputs.size() == 1, outputs.size() == 2);
-                maxPooling(inputs[0], outputs[0], outputs[1]);
+            {
+                CV_Assert_N(inputs.size() == 1, !computeMaxIdx || outputs.size() == 2);
+                Mat mask = computeMaxIdx ? outputs[1] : Mat();
+                maxPooling(inputs[0], outputs[0], mask);
                break;
+            }
            case AVE:
                CV_Assert_N(inputs.size() == 1, outputs.size() == 1);
                avePooling(inputs[0], outputs[0]);
@ -951,7 +949,10 @@ public:
            dims[0] = inputs[1][0];  // Number of proposals;
            dims[1] = psRoiOutChannels;
        }
-        outputs.assign(type == MAX ? 2 : 1, shape(dims, 4));
+
+        int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1);
+        CV_Assert(numOutputs == 1 || (numOutputs == 2 && type == MAX));
+        outputs.assign(numOutputs, shape(dims, 4));

        return false;
    }
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@ -358,7 +358,7 @@ TEST_P(Test_TensorFlow_nets, Faster_RCNN)
        (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
        throw SkipTestException("");

-    for (int i = 1; i < 2; ++i)
+    for (int i = 0; i < 2; ++i)
    {
        std::string proto = findDataFile("dnn/" + names[i] + ".pbtxt", false);
        std::string model = findDataFile("dnn/" + names[i] + ".pb", false);
--- a/modules/features2d/src/fast.cpp
+++ b/modules/features2d/src/fast.cpp
@ -401,11 +401,11 @@ static bool openvx_FAST(InputArray _img, std::vector<KeyPoint>& keypoints,
        img.swapHandle();
 #endif
    }
-    catch (RuntimeError & e)
+    catch (const RuntimeError & e)
    {
        VX_DbgThrow(e.what());
    }
-    catch (WrapperError & e)
+    catch (const WrapperError & e)
    {
        VX_DbgThrow(e.what());
    }
--- a/modules/imgcodecs/src/bitstrm.cpp
+++ b/modules/imgcodecs/src/bitstrm.cpp
@ -99,7 +99,7 @@ void  RBaseStream::readBlock()
    {
        if( m_block_pos == 0 && m_current < m_end )
            return;
-        CV_THROW (RBS_THROW_EOS);
+        throw RBS_THROW_EOS;
    }

    fseek( m_file, m_block_pos, SEEK_SET );
@ -107,7 +107,7 @@ void  RBaseStream::readBlock()
    m_end = m_start + readed;

    if( readed == 0 || m_current >= m_end )
-        CV_THROW (RBS_THROW_EOS);
+        throw RBS_THROW_EOS;
 }


--- a/modules/imgcodecs/src/exif.cpp
+++ b/modules/imgcodecs/src/exif.cpp
@ -80,15 +80,14 @@ ExifReader::~ExifReader()
 */
 bool ExifReader::parse()
 {
-    CV_TRY {
+    try {
        m_exif = getExif();
        if( !m_exif.empty() )
        {
            return true;
        }
        return false;
-    } CV_CATCH (ExifParsingError, e) {
-        CV_UNUSED(e);
+    } catch (ExifParsingError&) {
        return false;
    }
 }
@ -152,11 +151,11 @@ std::map<int, ExifEntry_t > ExifReader::getExif()
            case COM:
                bytesToSkip = getFieldSize();
                if (bytesToSkip < markerSize) {
-                    CV_THROW (ExifParsingError());
+                    throw ExifParsingError();
                }
                m_stream.seekg( static_cast<long>( bytesToSkip - markerSize ), m_stream.cur );
                if ( m_stream.fail() ) {
-                    CV_THROW (ExifParsingError());
+                    throw ExifParsingError();
                }
                break;

@ -167,12 +166,12 @@ std::map<int, ExifEntry_t > ExifReader::getExif()
            case APP1: //actual Exif Marker
                exifSize = getFieldSize();
                if (exifSize <= offsetToTiffHeader) {
-                    CV_THROW (ExifParsingError());
+                    throw ExifParsingError();
                }
                m_data.resize( exifSize - offsetToTiffHeader );
                m_stream.seekg( static_cast<long>( offsetToTiffHeader ), m_stream.cur );
                if ( m_stream.fail() ) {
-                    CV_THROW (ExifParsingError());
+                    throw ExifParsingError();
                }
                m_stream.read( reinterpret_cast<char*>(&m_data[0]), exifSize - offsetToTiffHeader );
                exifFound = true;
@ -416,7 +415,7 @@ std::string ExifReader::getString(const size_t offset) const
        dataOffset = getU32( offset + 8 );
    }
    if (dataOffset > m_data.size() || dataOffset + size > m_data.size()) {
-        CV_THROW (ExifParsingError());
+        throw ExifParsingError();
    }
    std::vector<uint8_t>::const_iterator it = m_data.begin() + dataOffset;
    std::string result( it, it + size ); //copy vector content into result
@ -433,7 +432,7 @@ std::string ExifReader::getString(const size_t offset) const
 uint16_t ExifReader::getU16(const size_t offset) const
 {
    if (offset + 1 >= m_data.size())
-        CV_THROW (ExifParsingError());
+        throw ExifParsingError();

    if( m_format == INTEL )
    {
@ -451,7 +450,7 @@ uint16_t ExifReader::getU16(const size_t offset) const
 uint32_t ExifReader::getU32(const size_t offset) const
 {
    if (offset + 3 >= m_data.size())
-        CV_THROW (ExifParsingError());
+        throw ExifParsingError();

    if( m_format == INTEL )
    {
--- a/modules/imgcodecs/src/grfmt_bmp.cpp
+++ b/modules/imgcodecs/src/grfmt_bmp.cpp
@ -89,7 +89,7 @@ bool  BmpDecoder::readHeader()
    else if( !m_strm.open( m_filename ))
        return false;

-    CV_TRY
+    try
    {
        m_strm.skip( 10 );
        m_offset = m_strm.getDWord();
@ -173,9 +173,9 @@ bool  BmpDecoder::readHeader()
            }
        }
    }
-    CV_CATCH_ALL
+    catch(...)
    {
-        CV_RETHROW();
+        throw;
    }
    // in 32 bit case alpha channel is used - so require CV_8UC4 type
    m_type = iscolor ? (m_bpp == 32 ? CV_8UC4 : CV_8UC3 ) : CV_8UC1;
@ -225,7 +225,7 @@ bool  BmpDecoder::readData( Mat& img )
    }
    uchar *src = _src.data(), *bgr = _bgr.data();

-    CV_TRY
+    try
    {
        m_strm.setPos( m_offset );

@ -490,9 +490,9 @@ decode_rle8_bad: ;
            CV_Error(cv::Error::StsError, "Invalid/unsupported mode");
        }
    }
-    CV_CATCH_ALL
+    catch(...)
    {
-        CV_RETHROW();
+        throw;
    }

    return result;
--- a/modules/imgcodecs/src/grfmt_pam.cpp
+++ b/modules/imgcodecs/src/grfmt_pam.cpp
@ -379,25 +379,25 @@ bool  PAMDecoder::readHeader()
    }
    else if( !m_strm.open( m_filename ))
        return false;
-    CV_TRY
+    try
    {
        byte = m_strm.getByte();
        if( byte != 'P' )
-            CV_THROW( RBS_BAD_HEADER );
+            throw RBS_BAD_HEADER;

        byte = m_strm.getByte();
        if (byte != '7')
-            CV_THROW( RBS_BAD_HEADER );
+            throw RBS_BAD_HEADER;

        byte = m_strm.getByte();
        if (byte != '\n' && byte != '\r')
-            CV_THROW( RBS_BAD_HEADER );
+            throw RBS_BAD_HEADER;

        uint i;
        memset (&flds, 0x00, sizeof (struct parsed_fields));
        do {
            if (!ReadPAMHeaderLine(m_strm, fieldtype, value))
-                CV_THROW( RBS_BAD_HEADER );
+                throw RBS_BAD_HEADER;
            switch (fieldtype) {
                case PAM_HEADER_NONE:
                case PAM_HEADER_COMMENT:
@ -407,32 +407,32 @@ bool  PAMDecoder::readHeader()
                    break;
                case PAM_HEADER_HEIGHT:
                    if (flds.height)
-                        CV_THROW( RBS_BAD_HEADER );
+                        throw RBS_BAD_HEADER;
                    if (!ParseNumber (value, &m_height))
-                        CV_THROW( RBS_BAD_HEADER );
+                        throw RBS_BAD_HEADER;
                    flds.height = true;
                    break;
                case PAM_HEADER_WIDTH:
                    if (flds.width)
-                        CV_THROW( RBS_BAD_HEADER );
+                        throw RBS_BAD_HEADER;
                    if (!ParseNumber (value, &m_width))
-                        CV_THROW( RBS_BAD_HEADER );
+                        throw RBS_BAD_HEADER;
                    flds.width = true;
                    break;
                case PAM_HEADER_DEPTH:
                    if (flds.depth)
-                        CV_THROW( RBS_BAD_HEADER );
+                        throw RBS_BAD_HEADER;
                    if (!ParseNumber (value, &m_channels))
-                        CV_THROW( RBS_BAD_HEADER );
+                        throw RBS_BAD_HEADER;
                    flds.depth = true;
                    break;
                case PAM_HEADER_MAXVAL:
                    if (flds.maxval)
-                        CV_THROW( RBS_BAD_HEADER );
+                        throw RBS_BAD_HEADER;
                    if (!ParseNumber (value, &m_maxval))
-                        CV_THROW( RBS_BAD_HEADER );
+                        throw RBS_BAD_HEADER;
                    if ( m_maxval > 65535 )
-                        CV_THROW( RBS_BAD_HEADER );
+                        throw RBS_BAD_HEADER;
                    if ( m_maxval > 255 ) {
                        m_sampledepth = CV_16U;
                    }
@ -451,7 +451,7 @@ bool  PAMDecoder::readHeader()
                    }
                    break;
                default:
-                    CV_THROW( RBS_BAD_HEADER );
+                    throw RBS_BAD_HEADER;
            }
        } while (fieldtype != PAM_HEADER_ENDHDR);

@ -469,7 +469,7 @@ bool  PAMDecoder::readHeader()

            return true;
        }
-    } CV_CATCH_ALL
+    } catch(...)
    {
    }

@ -512,7 +512,7 @@ bool  PAMDecoder::readData( Mat& img )
        }
    }

-    CV_TRY
+    try
    {
        m_strm.setPos( m_offset );

@ -610,7 +610,7 @@ bool  PAMDecoder::readData( Mat& img )
        }

        res = true;
-    } CV_CATCH_ALL
+    } catch(...)
    {
    }

--- a/modules/imgcodecs/src/grfmt_pxm.cpp
+++ b/modules/imgcodecs/src/grfmt_pxm.cpp
@ -150,11 +150,11 @@ bool PxMDecoder::readHeader()
    else if( !m_strm.open( m_filename ))
        return false;

-    CV_TRY
+    try
    {
        int code = m_strm.getByte();
        if( code != 'P' )
-            CV_THROW (RBS_BAD_HEADER);
+            throw RBS_BAD_HEADER;

        code = m_strm.getByte();
        switch( code )
@ -162,7 +162,7 @@ bool PxMDecoder::readHeader()
        case '1': case '4': m_bpp = 1; break;
        case '2': case '5': m_bpp = 8; break;
        case '3': case '6': m_bpp = 24; break;
-        default: CV_THROW (RBS_BAD_HEADER);
+        default: throw RBS_BAD_HEADER;
        }

        m_binary = code >= '4';
@ -173,7 +173,7 @@ bool PxMDecoder::readHeader()

        m_maxval = m_bpp == 1 ? 1 : ReadNumber(m_strm);
        if( m_maxval > 65535 )
-            CV_THROW (RBS_BAD_HEADER);
+            throw RBS_BAD_HEADER;

        //if( m_maxval > 255 ) m_binary = false; nonsense
        if( m_maxval > 255 )
@ -185,15 +185,14 @@ bool PxMDecoder::readHeader()
            result = true;
        }
    }
-    CV_CATCH (cv::Exception, e)
+    catch (const cv::Exception&)
    {
-        CV_UNUSED(e);
-        CV_RETHROW();
+        throw;
    }
-    CV_CATCH_ALL
+    catch (...)
    {
        std::cerr << "PXM::readHeader(): unknown C++ exception" << std::endl << std::flush;
-        CV_RETHROW();
+        throw;
    }

    if( !result )
@ -233,7 +232,7 @@ bool PxMDecoder::readData( Mat& img )
        FillGrayPalette( palette, m_bpp==1 ? 1 : 8 , m_bpp == 1 );
    }

-    CV_TRY
+    try
    {
        m_strm.setPos( m_offset );

@ -359,15 +358,14 @@ bool PxMDecoder::readData( Mat& img )
            CV_Error(Error::StsError, "m_bpp is not supported");
        }
    }
-    CV_CATCH (cv::Exception, e)
+    catch (const cv::Exception&)
    {
-        CV_UNUSED(e);
-        CV_RETHROW();
+        throw;
    }
-    CV_CATCH_ALL
+    catch (...)
    {
        std::cerr << "PXM::readData(): unknown exception" << std::endl << std::flush;
-        CV_RETHROW();
+        throw;
    }

    return result;
--- a/modules/imgcodecs/src/grfmt_sunras.cpp
+++ b/modules/imgcodecs/src/grfmt_sunras.cpp
@ -84,7 +84,7 @@ bool  SunRasterDecoder::readHeader()

    if( !m_strm.open( m_filename )) return false;

-    CV_TRY
+    try
    {
        m_strm.skip( 4 );
        m_width  = m_strm.getDWord();
@ -144,7 +144,7 @@ bool  SunRasterDecoder::readHeader()
            }
        }
    }
-    CV_CATCH_ALL
+    catch(...)
    {
    }

@ -179,7 +179,7 @@ bool  SunRasterDecoder::readData( Mat& img )
    if( !color && m_maptype == RMT_EQUAL_RGB )
        CvtPaletteToGray( m_palette, gray_palette, 1 << m_bpp );

-    CV_TRY
+    try
    {
        m_strm.setPos( m_offset );

@ -376,7 +376,7 @@ bad_decoding_end:
            CV_Error(Error::StsInternal, "");
        }
    }
-    CV_CATCH_ALL
+    catch( ... )
    {
    }

--- a/modules/imgcodecs/src/loadsave.cpp
+++ b/modules/imgcodecs/src/loadsave.cpp
@ -433,18 +433,18 @@ imread_( const String& filename, int flags, Mat& mat )
    /// set the filename in the driver
    decoder->setSource( filename );

-    CV_TRY
+    try
    {
        // read the header to make sure it succeeds
        if( !decoder->readHeader() )
            return 0;
    }
-    CV_CATCH (cv::Exception, e)
+    catch (const cv::Exception& e)
    {
        std::cerr << "imread_('" << filename << "'): can't read header: " << e.what() << std::endl << std::flush;
        return 0;
    }
-    CV_CATCH_ALL
+    catch (...)
    {
        std::cerr << "imread_('" << filename << "'): can't read header: unknown exception" << std::endl << std::flush;
        return 0;
@ -472,16 +472,16 @@ imread_( const String& filename, int flags, Mat& mat )

    // read the image data
    bool success = false;
-    CV_TRY
+    try
    {
        if (decoder->readData(mat))
            success = true;
    }
-    CV_CATCH (cv::Exception, e)
+    catch (const cv::Exception& e)
    {
        std::cerr << "imread_('" << filename << "'): can't read data: " << e.what() << std::endl << std::flush;
    }
-    CV_CATCH_ALL
+    catch (...)
    {
        std::cerr << "imread_('" << filename << "'): can't read data: unknown exception" << std::endl << std::flush;
    }
@ -534,18 +534,18 @@ imreadmulti_(const String& filename, int flags, std::vector<Mat>& mats)
    decoder->setSource(filename);

    // read the header to make sure it succeeds
-    CV_TRY
+    try
    {
        // read the header to make sure it succeeds
        if( !decoder->readHeader() )
            return 0;
    }
-    CV_CATCH (cv::Exception, e)
+    catch (const cv::Exception& e)
    {
        std::cerr << "imreadmulti_('" << filename << "'): can't read header: " << e.what() << std::endl << std::flush;
        return 0;
    }
-    CV_CATCH_ALL
+    catch (...)
    {
        std::cerr << "imreadmulti_('" << filename << "'): can't read header: unknown exception" << std::endl << std::flush;
        return 0;
@ -573,16 +573,16 @@ imreadmulti_(const String& filename, int flags, std::vector<Mat>& mats)
        // read the image data
        Mat mat(size.height, size.width, type);
        bool success = false;
-        CV_TRY
+        try
        {
            if (decoder->readData(mat))
                success = true;
        }
-        CV_CATCH (cv::Exception, e)
+        catch (const cv::Exception& e)
        {
            std::cerr << "imreadmulti_('" << filename << "'): can't read data: " << e.what() << std::endl << std::flush;
        }
-        CV_CATCH_ALL
+        catch (...)
        {
            std::cerr << "imreadmulti_('" << filename << "'): can't read data: unknown exception" << std::endl << std::flush;
        }
@ -749,16 +749,16 @@ imdecode_( const Mat& buf, int flags, Mat& mat )
    }

    bool success = false;
-    CV_TRY
+    try
    {
        if (decoder->readHeader())
            success = true;
    }
-    CV_CATCH (cv::Exception, e)
+    catch (const cv::Exception& e)
    {
        std::cerr << "imdecode_('" << filename << "'): can't read header: " << e.what() << std::endl << std::flush;
    }
-    CV_CATCH_ALL
+    catch (...)
    {
        std::cerr << "imdecode_('" << filename << "'): can't read header: unknown exception" << std::endl << std::flush;
    }
@ -794,16 +794,16 @@ imdecode_( const Mat& buf, int flags, Mat& mat )
    mat.create( size.height, size.width, type );

    success = false;
-    CV_TRY
+    try
    {
        if (decoder->readData(mat))
            success = true;
    }
-    CV_CATCH (cv::Exception, e)
+    catch (const cv::Exception& e)
    {
        std::cerr << "imdecode_('" << filename << "'): can't read data: " << e.what() << std::endl << std::flush;
    }
-    CV_CATCH_ALL
+    catch (...)
    {
        std::cerr << "imdecode_('" << filename << "'): can't read data: unknown exception" << std::endl << std::flush;
    }
--- a/modules/imgproc/src/accum.cpp
+++ b/modules/imgproc/src/accum.cpp
@ -291,11 +291,11 @@ static bool openvx_accumulate(InputArray _src, InputOutputArray _dst, InputArray
        srcImage.swapHandle(); dstImage.swapHandle();
 #endif
    }
-    catch (ivx::RuntimeError & e)
+    catch (const ivx::RuntimeError & e)
    {
        VX_DbgThrow(e.what());
    }
-    catch (ivx::WrapperError & e)
+    catch (const ivx::WrapperError & e)
    {
        VX_DbgThrow(e.what());
    }
--- a/modules/imgproc/src/box_filter.cpp
+++ b/modules/imgproc/src/box_filter.cpp
--- a/modules/imgproc/src/contours.cpp
+++ b/modules/imgproc/src/contours.cpp
@ -1821,7 +1821,7 @@ cvFindContours_Impl( void*  img,  CvMemStorage*  storage,
    }
    else
    {
-        CV_TRY
+        try
        {
            scanner = cvStartFindContours_Impl( img, storage, cntHeaderSize, mode, method, offset,
                                            needFillBorder);
@ -1833,11 +1833,11 @@ cvFindContours_Impl( void*  img,  CvMemStorage*  storage,
            }
            while( contour != 0 );
        }
-        CV_CATCH_ALL
+        catch(...)
        {
            if( scanner )
                cvEndFindContours(&scanner);
-            CV_RETHROW();
+            throw;
        }

        *firstContour = cvEndFindContours( &scanner );
--- a/modules/imgproc/src/deriv.cpp
+++ b/modules/imgproc/src/deriv.cpp
@ -246,11 +246,11 @@ namespace cv
                ivx::IVX_CHECK_STATUS(vxuSobel3x3(ctx, ia, NULL, ib));
            ctx.setImmediateBorder(prevBorder);
        }
-        catch (ivx::RuntimeError & e)
+        catch (const ivx::RuntimeError & e)
        {
            VX_DbgThrow(e.what());
        }
-        catch (ivx::WrapperError & e)
+        catch (const ivx::WrapperError & e)
        {
            VX_DbgThrow(e.what());
        }
--- a/modules/imgproc/src/featureselect.cpp
+++ b/modules/imgproc/src/featureselect.cpp
@ -338,11 +338,11 @@ static bool openvx_harris(Mat image, OutputArray _corners,
        ovxImage.swapHandle();
 #endif
    }
-    catch (RuntimeError & e)
+    catch (const RuntimeError & e)
    {
        VX_DbgThrow(e.what());
    }
-    catch (WrapperError & e)
+    catch (const WrapperError & e)
    {
        VX_DbgThrow(e.what());
    }
--- a/modules/imgproc/src/histogram.cpp
+++ b/modules/imgproc/src/histogram.cpp
@ -793,11 +793,11 @@ namespace cv
            img.swapHandle();
 #endif
        }
-        catch (ivx::RuntimeError & e)
+        catch (const ivx::RuntimeError & e)
        {
            VX_DbgThrow(e.what());
        }
-        catch (ivx::WrapperError & e)
+        catch (const ivx::WrapperError & e)
        {
            VX_DbgThrow(e.what());
        }
@ -3313,11 +3313,11 @@ static bool openvx_equalize_hist(Mat srcMat, Mat dstMat)
        srcImage.swapHandle(); dstImage.swapHandle();
 #endif
    }
-    catch (RuntimeError & e)
+    catch (const RuntimeError & e)
    {
        VX_DbgThrow(e.what());
    }
-    catch (WrapperError & e)
+    catch (const WrapperError & e)
    {
        VX_DbgThrow(e.what());
    }
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@ -1598,12 +1598,12 @@ static bool openvx_remap(Mat src, Mat dst, Mat map1, Mat map2, int interpolation

        ctx.setImmediateBorder(prevBorder);
    }
-    catch (ivx::RuntimeError & e)
+    catch (const ivx::RuntimeError & e)
    {
        CV_Error(CV_StsInternal, e.what());
        return false;
    }
-    catch (ivx::WrapperError & e)
+    catch (const ivx::WrapperError & e)
    {
        CV_Error(CV_StsInternal, e.what());
        return false;
--- a/modules/imgproc/src/median_blur.cpp
+++ b/modules/imgproc/src/median_blur.cpp
@ -1068,11 +1068,11 @@ static bool openvx_medianFilter(InputArray _src, OutputArray _dst, int ksize)
 #endif
        ctx.setImmediateBorder(prevBorder);
    }
-    catch (ivx::RuntimeError & e)
+    catch (const ivx::RuntimeError & e)
    {
        VX_DbgThrow(e.what());
    }
-    catch (ivx::WrapperError & e)
+    catch (const ivx::WrapperError & e)
    {
        VX_DbgThrow(e.what());
    }
--- a/modules/imgproc/src/pyramids.cpp
+++ b/modules/imgproc/src/pyramids.cpp
@ -861,11 +861,11 @@ static bool openvx_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz,
        srcImg.swapHandle(); dstImg.swapHandle();
 #endif
    }
-    catch (RuntimeError & e)
+    catch (const RuntimeError & e)
    {
        VX_DbgThrow(e.what());
    }
-    catch (WrapperError & e)
+    catch (const WrapperError & e)
    {
        VX_DbgThrow(e.what());
    }
--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
--- a/modules/imgproc/src/thresh.cpp
+++ b/modules/imgproc/src/thresh.cpp
@ -1357,11 +1357,11 @@ static bool openvx_threshold(Mat src, Mat dst, int thresh, int maxval, int type)
        }
 #endif
    }
-    catch (ivx::RuntimeError & e)
+    catch (const ivx::RuntimeError & e)
    {
        VX_DbgThrow(e.what());
    }
-    catch (ivx::WrapperError & e)
+    catch (const ivx::WrapperError & e)
    {
        VX_DbgThrow(e.what());
    }
--- a/modules/objdetect/include/opencv2/objdetect.hpp
+++ b/modules/objdetect/include/opencv2/objdetect.hpp
@ -713,7 +713,6 @@ protected:
 };

 //! @} objdetect
-
 }

 #include "opencv2/objdetect/detection_based_tracker.hpp"
--- a/modules/objdetect/src/detection_based_tracker.cpp
+++ b/modules/objdetect/src/detection_based_tracker.cpp
@ -209,23 +209,23 @@ bool cv::DetectionBasedTracker::SeparateDetectionWork::run()
 }

 #define CATCH_ALL_AND_LOG(_block)                                                           \
-    CV_TRY {                                                                                   \
+    try {                                                                                   \
        _block;                                                                             \
    }                                                                                       \
-    CV_CATCH(cv::Exception, e) {                                                               \
+    catch(const cv::Exception& e) {                                                         \
        LOGE0("\n %s: ERROR: OpenCV Exception caught: \n'%s'\n\n", CV_Func, e.what());      \
-    } CV_CATCH(std::exception, e) {                                                            \
+    } catch(const std::exception& e) {                                                      \
        LOGE0("\n %s: ERROR: Exception caught: \n'%s'\n\n", CV_Func, e.what());             \
-    } CV_CATCH_ALL {                                                                          \
+    } catch(...) {                                                                          \
        LOGE0("\n %s: ERROR: UNKNOWN Exception caught\n\n", CV_Func);                       \
    }

 void* cv::workcycleObjectDetectorFunction(void* p)
 {
    CATCH_ALL_AND_LOG({ ((cv::DetectionBasedTracker::SeparateDetectionWork*)p)->workcycleObjectDetector(); });
-    CV_TRY{
+    try{
        ((cv::DetectionBasedTracker::SeparateDetectionWork*)p)->init();
-    } CV_CATCH_ALL {
+    } catch(...) {
        LOGE0("DetectionBasedTracker: workcycleObjectDetectorFunction: ERROR concerning pointer, received as the function parameter");
    }
    return NULL;
--- a/modules/objdetect/src/qrcode.cpp
+++ b/modules/objdetect/src/qrcode.cpp
@ -1059,7 +1059,7 @@ bool QRDecode::fullDecodingProcess()
 #endif
 }

-CV_EXPORTS std::string QRCodeDetector::decode(InputArray in, InputArray points,
+std::string QRCodeDetector::decode(InputArray in, InputArray points,
                                   OutputArray straight_qrcode)
 {
    Mat inarr = in.getMat();
@ -1096,7 +1096,7 @@ CV_EXPORTS std::string QRCodeDetector::decode(InputArray in, InputArray points,
    return ok ? decoded_info : std::string();
 }

-CV_EXPORTS std::string QRCodeDetector::detectAndDecode(InputArray in,
+std::string QRCodeDetector::detectAndDecode(InputArray in,
                                            OutputArray points_,
                                            OutputArray straight_qrcode)
 {
@ -1127,5 +1127,4 @@ CV_EXPORTS std::string QRCodeDetector::detectAndDecode(InputArray in,
    return decoded_info;
 }

-
 }
--- a/modules/ts/include/opencv2/ts/ts_ext.hpp
+++ b/modules/ts/include/opencv2/ts/ts_ext.hpp
@ -39,7 +39,7 @@ extern int testThreads;
          Body(); \
          CV__TEST_CLEANUP \
       } \
-       catch (cvtest::SkipTestException& e) \
+       catch (const cvtest::SkipTestException& e) \
       { \
          printf("[     SKIP ] %s\n", e.what()); \
       } \
@ -87,7 +87,7 @@ extern int testThreads;
          Body(); \
          CV__TEST_CLEANUP \
       } \
-       catch (cvtest::SkipTestException& e) \
+       catch (const cvtest::SkipTestException& e) \
       { \
          printf("[     SKIP ] %s\n", e.what()); \
       } \
--- a/modules/ts/src/ts_perf.cpp
+++ b/modules/ts/src/ts_perf.cpp
@ -232,7 +232,7 @@ void Regression::init(const std::string& testSuitName, const std::string& ext)
            storageOutPath += ext;
        }
    }
-    catch(cv::Exception&)
+    catch(const cv::Exception&)
    {
        LOGE("Failed to open sanity data for reading: %s", storageInPath.c_str());
    }
@ -1987,22 +1987,22 @@ void TestBase::RunPerfTestBody()
            implConf.GetImpl();
 #endif
    }
-    catch(SkipTestException&)
+    catch(const SkipTestException&)
    {
        metrics.terminationReason = performance_metrics::TERM_SKIP_TEST;
        return;
    }
-    catch(PerfSkipTestException&)
+    catch(const PerfSkipTestException&)
    {
        metrics.terminationReason = performance_metrics::TERM_SKIP_TEST;
        return;
    }
-    catch(PerfEarlyExitException&)
+    catch(const PerfEarlyExitException&)
    {
        metrics.terminationReason = performance_metrics::TERM_INTERRUPT;
        return;//no additional failure logging
    }
-    catch(cv::Exception& e)
+    catch(const cv::Exception& e)
    {
        metrics.terminationReason = performance_metrics::TERM_EXCEPTION;
        #ifdef HAVE_CUDA
@ -2011,7 +2011,7 @@ void TestBase::RunPerfTestBody()
        #endif
        FAIL() << "Expected: PerfTestBody() doesn't throw an exception.\n  Actual: it throws cv::Exception:\n  " << e.what();
    }
-    catch(std::exception& e)
+    catch(const std::exception& e)
    {
        metrics.terminationReason = performance_metrics::TERM_EXCEPTION;
        FAIL() << "Expected: PerfTestBody() doesn't throw an exception.\n  Actual: it throws std::exception:\n  " << e.what();
--- a/modules/video/src/lkpyramid.cpp
+++ b/modules/video/src/lkpyramid.cpp
@ -1189,11 +1189,11 @@ namespace
        prevImg.swapHandle(); nextImg.swapHandle();
 #endif
        }
-        catch (RuntimeError & e)
+        catch (const RuntimeError & e)
        {
            VX_DbgThrow(e.what());
        }
-        catch (WrapperError & e)
+        catch (const WrapperError & e)
        {
            VX_DbgThrow(e.what());
        }
--- a/modules/videoio/include/opencv2/videoio.hpp
+++ b/modules/videoio/include/opencv2/videoio.hpp
@ -169,8 +169,10 @@ enum VideoCaptureProperties {
       CAP_PROP_AUTOFOCUS     =39,
       CAP_PROP_SAR_NUM       =40, //!< Sample aspect ratio: num/den (num)
       CAP_PROP_SAR_DEN       =41, //!< Sample aspect ratio: num/den (den)
-       CAP_PROP_BACKEND       =42, //!< current backend (enum VideoCaptureAPIs). Read-only property
-       CAP_CROSSBAR_INPIN_TYPE =43, //!<CrossBar input pin Setting
+       CAP_PROP_BACKEND       =42, //!< Current backend (enum VideoCaptureAPIs). Read-only property
+       CAP_PROP_CHANNEL       =43, //!< Video input or Channel Number (only for those cameras that support)
+       CAP_PROP_AUTO_WB       =44, //!< enable/ disable auto white-balance
+       CAP_PROP_WB_TEMPERATURE=45, //!< white-balance color temperature
 #ifndef CV_DOXYGEN
       CV__CAP_PROP_LATEST
 #endif
--- a/modules/videoio/src/cap_dshow.cpp
+++ b/modules/videoio/src/cap_dshow.cpp
@ -141,6 +141,10 @@ DEFINE_GUID(MEDIASUBTYPE_Y8, 0x20203859, 0x0000, 0x0010, 0x80, 0x00,
    0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71);
 DEFINE_GUID(MEDIASUBTYPE_Y800, 0x30303859, 0x0000, 0x0010, 0x80, 0x00,
    0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71);
+DEFINE_GUID(MEDIASUBTYPE_Y16, 0x20363159, 0x0000, 0x0010, 0x80, 0x00,
+    0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71);
+DEFINE_GUID(MEDIASUBTYPE_BY8, 0x20385942, 0x0000, 0x0010, 0x80, 0x00,
+    0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71);

 DEFINE_GUID(CLSID_CaptureGraphBuilder2,0xbf87b6e1,0x8c27,0x11d0,0xb3,0xf0,0x00,0xaa,0x00,0x37,0x61,0xc5);
 DEFINE_GUID(CLSID_FilterGraph,0xe436ebb3,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70);
@ -333,7 +337,7 @@ static void DebugPrintOut(const char *format, ...)
 //videoInput defines
 #define VI_VERSION      0.1995
 #define VI_MAX_CAMERAS  20
-#define VI_NUM_TYPES    20 //MGB
+#define VI_NUM_TYPES    22 //MGB
 #define VI_NUM_FORMATS  18 //DON'T TOUCH

 //defines for setPhyCon - tuner is not as well supported as composite and s-video
@ -427,6 +431,7 @@ class videoDevice{
        bool setupStarted;
        bool specificFormat;
        bool autoReconnect;
+        bool convertRGB;
        int  nFramesForReconnect;
        unsigned long nFramesRunning;
        int  connection;
@ -522,6 +527,10 @@ class videoInput{
        int  getFourcc(int deviceID) const;
        double getFPS(int deviceID) const;

+        // RGB conversion setting
+        bool getConvertRGB(int deviceID);
+        bool setConvertRGB(int deviceID, bool enable);
+
        //completely stops and frees a device
        void stopDevice(int deviceID);

@ -539,11 +548,13 @@ class videoInput{

        int property_window_count(int device_idx);

+        GUID getMediasubtype(int deviceID);
+
    private:
        void setPhyCon(int deviceID, int conn);
        void setAttemptCaptureSize(int deviceID, int w, int h,GUID mediaType=MEDIASUBTYPE_RGB24);
        bool setup(int deviceID);
-        void processPixels(unsigned char * src, unsigned char * dst, int width, int height, bool bRGB, bool bFlip);
+        void processPixels(unsigned char * src, unsigned char * dst, int width, int height, bool bRGB, bool bFlip, int bytesperpixel = 3);
        int  start(int deviceID, videoDevice * VD);
        int  getDeviceCount();
        void getMediaSubtypeAsString(GUID type, char * typeAsString);
@ -586,6 +597,24 @@ class videoInput{

 ///////////////////////////  HANDY FUNCTIONS  /////////////////////////////

+//Included by e-con
+//Checks whether the current formattype is single byte format
+//Eg: MEDIASUBTYPE_Y800, MEDIASUBTYPE_Y8, MEDIASUBTYPE_GREY
+static bool checkSingleByteFormat(GUID formatType)
+{
+
+    if (formatType == MEDIASUBTYPE_Y800 ||
+        formatType == MEDIASUBTYPE_Y8 ||
+        formatType == MEDIASUBTYPE_GREY)
+    {
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+
 static void MyFreeMediaType(AM_MEDIA_TYPE& mt){
    if (mt.cbFormat != 0)
    {
@ -761,6 +790,7 @@ videoDevice::videoDevice(){
     setupStarted       = false;
     specificFormat     = false;
     autoReconnect      = false;
+     convertRGB         = true;
     requestedFrameTime = -1;

     pBuffer = 0;
@ -788,7 +818,20 @@ void videoDevice::setSize(int w, int h){
    {
        width               = w;
        height              = h;
+
+        if (checkSingleByteFormat(pAmMediaType->subtype))
+        {
+            videoSize      = w * h;
+        }
+        else if (pAmMediaType->subtype == MEDIASUBTYPE_Y16)
+        {
+            videoSize      = w * h * 2;
+        }
+        else
+        {
            videoSize      = w * h * 3;
+        }
+
        sizeSet             = true;
        pixels              = new unsigned char[videoSize];
        pBuffer             = new char[videoSize];
@ -1060,6 +1103,8 @@ videoInput::videoInput(){
    mediaSubtypes[17]    = MEDIASUBTYPE_Y8;
    mediaSubtypes[18]    = MEDIASUBTYPE_GREY;
    mediaSubtypes[19]    = MEDIASUBTYPE_I420;
+    mediaSubtypes[20] = MEDIASUBTYPE_BY8;
+    mediaSubtypes[21] = MEDIASUBTYPE_Y16;

    //The video formats we support
    formatTypes[VI_NTSC_M]      = AnalogVideo_NTSC_M;
@ -1181,6 +1226,9 @@ bool videoInput::setupDeviceFourcc(int deviceNumber, int w, int h,int fourcc){
        GUID *mediaType = getMediaSubtypeFromFourcc(fourcc);
        if ( mediaType ) {
            setAttemptCaptureSize(deviceNumber,w,h,*mediaType);
+        } else {
+            DebugPrintOut("SETUP: Unknown GUID \n");
+            return false;
        }
    } else {
        setAttemptCaptureSize(deviceNumber,w,h);
@ -1448,6 +1496,37 @@ int videoInput::getSize(int id) const

 }

+// ----------------------------------------------------------------------
+//
+//
+// ----------------------------------------------------------------------
+
+bool videoInput::getConvertRGB(int id)
+{
+    if (isDeviceSetup(id))
+    {
+        return VDList[id]->convertRGB;
+    }
+    else
+    {
+        return false;
+    }
+
+}
+
+bool videoInput::setConvertRGB(int id, bool enable)
+{
+    if (isDeviceSetup(id))
+    {
+        VDList[id]->convertRGB = enable;
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+

 // ----------------------------------------------------------------------
 // Uses a supplied buffer
@ -1472,7 +1551,24 @@ bool videoInput::getPixels(int id, unsigned char * dstBuffer, bool flipRedAndBlu
                int height             = VDList[id]->height;
                int width              = VDList[id]->width;

+                // Conditional processing for 8/16-bit images (e-Con systems)
+                if (checkSingleByteFormat(VDList[id]->pAmMediaType->subtype))
+                {
+                    memcpy(dst, src, width * height);
+                }
+                else if (VDList[id]->pAmMediaType->subtype == MEDIASUBTYPE_Y16)
+                {
+                    if (!VDList[id]->convertRGB) {
+                        memcpy(dst, src, width * height * 2);
+                    }
+                    else {
+                        processPixels(src, dst, width, height, flipRedAndBlue, flipImage, 2);
+                    }
+                }
+                else
+                {
                    processPixels(src, dst, width, height, flipRedAndBlue, flipImage);
+                }
                VDList[id]->sgCallback->newFrame = false;

            LeaveCriticalSection(&VDList[id]->sgCallback->critSection);
@ -2112,11 +2208,29 @@ bool videoInput::setup(int deviceNumber){
 // You have any combination of those.
 // ----------------------------------------------------------------------

-void videoInput::processPixels(unsigned char * src, unsigned char * dst, int width, int height, bool bRGB, bool bFlip){
+void videoInput::processPixels(unsigned char * src, unsigned char * dst, int width, int height, bool bRGB, bool bFlip, int bytesperpixel){

-    int widthInBytes = width * 3;
+    int widthInBytes = width * bytesperpixel;
    int numBytes = widthInBytes * height;

+    if (bytesperpixel == 2) {
+        for (int i = 0; i < width*height; i++) {
+            if (bytesperpixel == 2) {
+                *dst = (uint8_t) (*((uint16_t*) src) >> 8);
+                dst++;
+
+                *dst = (uint8_t) (*((uint16_t*)src) >> 8);
+                dst++;
+
+                *dst = (uint8_t) (*((uint16_t*)src) >> 8);
+                dst++;
+
+                src += 2;
+            }
+        }
+    }
+    else
+    {
        if(!bRGB){

            //int x = 0;
@ -2172,6 +2286,7 @@ void videoInput::processPixels(unsigned char * src, unsigned char * dst, int wid
            }
        }
    }
+}


 //------------------------------------------------------------------------------------------
@ -2198,6 +2313,8 @@ void videoInput::getMediaSubtypeAsString(GUID type, char * typeAsString){
    else if(type == MEDIASUBTYPE_Y8)    sprintf(tmpStr, "Y8");
    else if(type == MEDIASUBTYPE_GREY)  sprintf(tmpStr, "GREY");
    else if(type == MEDIASUBTYPE_I420)  sprintf(tmpStr, "I420");
+    else if (type == MEDIASUBTYPE_BY8)  sprintf(tmpStr, "BY8");
+    else if (type == MEDIASUBTYPE_Y16)  sprintf(tmpStr, "Y16");
    else sprintf(tmpStr, "OTHER");

    memcpy(typeAsString, tmpStr, sizeof(char)*8);
@ -2339,6 +2456,10 @@ void videoInput::getCameraPropertyAsString(int prop, char * propertyAsString){
    memcpy(propertyAsString, tmpStr, sizeof(char)*16);
 }

+GUID videoInput::getMediasubtype(int deviceID)
+{
+    return VDList[deviceID]->pAmMediaType->subtype;
+}

 //-------------------------------------------------------------------------------------------
 static void findClosestSizeAndSubtype(videoDevice * VD, int widthIn, int heightIn, int &widthOut, int &heightOut, GUID & mediatypeOut){
@ -2729,7 +2850,17 @@ int videoInput::start(int deviceID, videoDevice *VD){
    ZeroMemory(&mt,sizeof(AM_MEDIA_TYPE));

    mt.majortype     = MEDIATYPE_Video;
-    mt.subtype         = MEDIASUBTYPE_RGB24;
+
+    // Disable format conversion if using 8/16-bit data (e-Con systems)
+    if (checkSingleByteFormat(VD->pAmMediaType->subtype) || (VD->pAmMediaType->subtype == MEDIASUBTYPE_Y16)) {
+        DebugPrintOut("SETUP: Not converting frames to RGB.\n");
+        mt.subtype = VD->pAmMediaType->subtype;
+    }
+    else
+    {
+        DebugPrintOut("SETUP: Converting frames to RGB.\n");
+        mt.subtype = MEDIASUBTYPE_RGB24;	//Making it RGB24, does conversion from YUV to RGB
+    }
    mt.formattype     = FORMAT_VideoInfo;

    //VD->pAmMediaType->subtype = VD->videoType;
@ -3270,15 +3401,22 @@ bool VideoCapture_DShow::setProperty(int propIdx, double propVal)

    case CV_CAP_PROP_FOURCC:
        m_fourcc = (int)(unsigned long)(propVal);
+        m_width = (int)getProperty(CAP_PROP_FRAME_WIDTH);
+        m_height = (int)getProperty(CAP_PROP_FRAME_HEIGHT);
+
        if (-1 == m_fourcc)
        {
            // following cvCreateVideo usage will pop up caprturepindialog here if fourcc=-1
            // TODO - how to create a capture pin dialog
        }
+        else
+        {
            handled = true;
+        }
+
        break;

-    case CAP_CROSSBAR_INPIN_TYPE:
+    case CAP_PROP_CHANNEL:

        if (cvFloor(propVal) < 0)
            break;
@ -3312,6 +3450,12 @@ bool VideoCapture_DShow::setProperty(int propIdx, double propVal)
        }
        return g_VI.setVideoSettingCamera(m_index, CameraControl_Focus, currentFocus, enabled ? CameraControl_Flags_Auto | CameraControl_Flags_Manual : CameraControl_Flags_Manual, enabled ? true : false);
    }
+
+    case CV_CAP_PROP_CONVERT_RGB:
+    {
+        return g_VI.setConvertRGB(m_index, cvRound(propVal) == 1);
+    }
+
    }

    if (handled)
@ -3319,7 +3463,7 @@ bool VideoCapture_DShow::setProperty(int propIdx, double propVal)
        // a stream setting
        if (m_width > 0 && m_height > 0)
        {
-            if (m_width != g_VI.getWidth(m_index) || m_height != g_VI.getHeight(m_index) )//|| fourcc != VI.getFourcc(index) )
+            if (m_width != g_VI.getWidth(m_index) || m_height != g_VI.getHeight(m_index) || m_fourcc != g_VI.getFourcc(m_index) )
            {
                int fps = static_cast<int>(g_VI.getFPS(m_index));
                g_VI.stopDevice(m_index);
@ -3330,10 +3474,14 @@ bool VideoCapture_DShow::setProperty(int propIdx, double propVal)
            bool success = g_VI.isDeviceSetup(m_index);
            if (success)
            {
+                DebugPrintOut("SETUP: Updated FourCC\n");
                m_widthSet = m_width;
                m_heightSet = m_height;
                m_width = m_height = m_fourcc = -1;
            }
+            else {
+                DebugPrintOut("SETUP: Couldn't update FourCC\n");
+            }
            return success;
        }
        return true;
@ -3383,7 +3531,18 @@ bool VideoCapture_DShow::grabFrame()
 }
 bool VideoCapture_DShow::retrieveFrame(int, OutputArray frame)
 {
-    frame.create(Size(g_VI.getWidth(m_index), g_VI.getHeight(m_index)), CV_8UC3);
+    int w = g_VI.getWidth(m_index), h = g_VI.getHeight(m_index);
+    bool convertRGB = g_VI.getConvertRGB(m_index);
+
+    // Set suitable output matrix type (e-Con systems)
+    if (checkSingleByteFormat(g_VI.getMediasubtype(m_index))){
+        frame.create(Size(w, h), CV_8UC1);
+    } else if (g_VI.getMediasubtype(m_index) == MEDIASUBTYPE_Y16 && !convertRGB) {
+        frame.create(Size(w, h), CV_16UC1);
+    } else {
+        frame.create(Size(w, h), CV_8UC3);
+    }
+
    cv::Mat mat = frame.getMat();
    return g_VI.getPixels(m_index, mat.ptr(), false, true );
 }
--- a/modules/videoio/src/cap_gphoto2.cpp
+++ b/modules/videoio/src/cap_gphoto2.cpp
@ -70,7 +70,7 @@ public:
        return gp_result_as_string(result);
    }
    friend std::ostream & operator<<(std::ostream & ostream,
-            GPhoto2Exception & e)
+            const GPhoto2Exception & e)
    {
        return ostream << e.method << ": " << e.what();
    }
@ -336,7 +336,7 @@ void DigitalCameraCapture::initContext()
        CR(gp_camera_autodetect(allDevices, context));
        CR(numDevices = gp_list_count(allDevices));
    }
-    catch (GPhoto2Exception & e)
+    catch (const GPhoto2Exception & e)
    {
        numDevices = 0;
    }
@ -389,7 +389,7 @@ DigitalCameraCapture::~DigitalCameraCapture()
        gp_context_unref(context);
        context = NULL;
    }
-    catch (GPhoto2Exception & e)
+    catch (const GPhoto2Exception & e)
    {
        message(ERROR, "destruction error", e);
    }
@ -442,7 +442,7 @@ bool DigitalCameraCapture::open(int index)
        opened = true;
        return true;
    }
-    catch (GPhoto2Exception & e)
+    catch (const GPhoto2Exception & e)
    {
        message(WARNING, "opening device failed", e);
        return false;
@ -491,7 +491,7 @@ void DigitalCameraCapture::close()
            rootWidget = NULL;
        }
    }
-    catch (GPhoto2Exception & e)
+    catch (const GPhoto2Exception & e)
    {
        message(ERROR, "cannot close device properly", e);
    }
@ -664,7 +664,7 @@ double DigitalCameraCapture::getProperty(int propertyId) const
            }
        }
    }
-    catch (GPhoto2Exception & e)
+    catch (const GPhoto2Exception & e)
    {
        char buf[128] = "";
        sprintf(buf, "cannot get property: %d", propertyId);
@ -807,7 +807,7 @@ bool DigitalCameraCapture::setProperty(int propertyId, double value)
            CR(gp_widget_set_changed(widget, 0));
        }
    }
-    catch (GPhoto2Exception & e)
+    catch (const GPhoto2Exception & e)
    {
        char buf[128] = "";
        sprintf(buf, "cannot set property: %d to %f", propertyId, value);
@ -849,7 +849,7 @@ bool DigitalCameraCapture::grabFrame()
        capturedFrames++;
        grabbedFrames.push_back(file);
    }
-    catch (GPhoto2Exception & e)
+    catch (const GPhoto2Exception & e)
    {
        if (file)
            gp_file_unref(file);
@ -873,7 +873,7 @@ bool DigitalCameraCapture::retrieveFrame(int, OutputArray outputFrame)
            readFrameFromFile(file, outputFrame);
            CR(gp_file_unref(file));
        }
-        catch (GPhoto2Exception & e)
+        catch (const GPhoto2Exception & e)
        {
            message(WARNING, "cannot read file grabbed from device", e);
            return false;
@ -914,7 +914,7 @@ int DigitalCameraCapture::findDevice(const char * deviceName) const
            }
        }
    }
-    catch (GPhoto2Exception & e)
+    catch (const GPhoto2Exception & e)
    {
        ; // pass
    }
@ -980,7 +980,7 @@ CameraWidget * DigitalCameraCapture::findWidgetByName(
            }
            return (it != end) ? it->second : NULL;
        }
-        catch (GPhoto2Exception & e)
+        catch (const GPhoto2Exception & e)
        {
            message(WARNING, "error while searching for widget", e);
        }
--- a/modules/videoio/src/cap_v4l.cpp
+++ b/modules/videoio/src/cap_v4l.cpp
--- a/modules/videoio/test/test_camera.cpp
+++ b/modules/videoio/test/test_camera.cpp
@ -11,16 +11,8 @@

 namespace opencv_test { namespace {

-TEST(DISABLED_VideoIO_Camera, basic)
+static void test_readFrames(/*const*/ VideoCapture& capture, const int N = 100)
 {
-    VideoCapture capture(0);
-    ASSERT_TRUE(capture.isOpened());
-    std::cout << "Camera 0 via " << capture.getBackendName() << " backend" << std::endl;
-    std::cout << "Frame width: " << capture.get(CAP_PROP_FRAME_WIDTH) << std::endl;
-    std::cout << "     height: " << capture.get(CAP_PROP_FRAME_HEIGHT) << std::endl;
-    std::cout << "Capturing FPS: " << capture.get(CAP_PROP_FPS) << std::endl;
-
-    const int N = 100;
    Mat frame;
    int64 time0 = cv::getTickCount();
    for (int i = 0; i < N; i++)
@ -34,36 +26,46 @@ TEST(DISABLED_VideoIO_Camera, basic)
    }
    int64 time1 = cv::getTickCount();
    printf("Processed %d frames on %.2f FPS\n", N, (N * cv::getTickFrequency()) / (time1 - time0 + 1));
-
-    capture.release();
 }

-//Following test if for capture device using PhysConn_Video_SerialDigital as crossbar input pin
-TEST(DISABLED_VideoIO_Camera, dshow_avermedia_capture)
+TEST(DISABLED_VideoIO_Camera, basic)
 {
    VideoCapture capture(0);
    ASSERT_TRUE(capture.isOpened());
-    capture.set(CAP_CROSSBAR_INPIN_TYPE, 6);
    std::cout << "Camera 0 via " << capture.getBackendName() << " backend" << std::endl;
    std::cout << "Frame width: " << capture.get(CAP_PROP_FRAME_WIDTH) << std::endl;
    std::cout << "     height: " << capture.get(CAP_PROP_FRAME_HEIGHT) << std::endl;
    std::cout << "Capturing FPS: " << capture.get(CAP_PROP_FPS) << std::endl;
+    test_readFrames(capture);
+    capture.release();
+}

-    const int N = 100;
-    Mat frame;
-    int64 time0 = cv::getTickCount();
-    for (int i = 0; i < N; i++)
+TEST(DISABLED_VideoIO_Camera, validate_V4L2_MJPEG)
 {
-        SCOPED_TRACE(cv::format("frame=%d", i));
-
-        capture >> frame;
-        ASSERT_FALSE(frame.empty());
-
-        EXPECT_GT(cvtest::norm(frame, NORM_INF), 0) << "Complete black image has been received";
+    VideoCapture capture(CAP_V4L2);
+    ASSERT_TRUE(capture.isOpened());
+    ASSERT_TRUE(capture.set(CAP_PROP_FOURCC, VideoWriter::fourcc('M', 'J', 'P', 'G')));
+    std::cout << "Camera 0 via " << capture.getBackendName() << " backend" << std::endl;
+    std::cout << "Frame width: " << capture.get(CAP_PROP_FRAME_WIDTH) << std::endl;
+    std::cout << "     height: " << capture.get(CAP_PROP_FRAME_HEIGHT) << std::endl;
+    std::cout << "Capturing FPS: " << capture.get(CAP_PROP_FPS) << std::endl;
+    int fourcc = (int)capture.get(CAP_PROP_FOURCC);
+    std::cout << "FOURCC code: " << cv::format("0x%8x", fourcc) << std::endl;
+    test_readFrames(capture);
+    capture.release();
 }
-    int64 time1 = cv::getTickCount();
-    printf("Processed %d frames on %.2f FPS\n", N, (N * cv::getTickFrequency()) / (time1 - time0 + 1));

+//Following test if for capture device using PhysConn_Video_SerialDigital as crossbar input pin
+TEST(DISABLED_VideoIO_Camera, dshow_avermedia_capture)
+{
+    VideoCapture capture(0);
+    ASSERT_TRUE(capture.isOpened());
+    capture.set(CAP_PROP_CHANNEL, 6);
+    std::cout << "Camera 0 via " << capture.getBackendName() << " backend" << std::endl;
+    std::cout << "Frame width: " << capture.get(CAP_PROP_FRAME_WIDTH) << std::endl;
+    std::cout << "     height: " << capture.get(CAP_PROP_FRAME_HEIGHT) << std::endl;
+    std::cout << "Capturing FPS: " << capture.get(CAP_PROP_FPS) << std::endl;
+    test_readFrames(capture);
    capture.release();
 }

--- a/samples/android/face-detection/jni/DetectionBasedTracker_jni.cpp
+++ b/samples/android/face-detection/jni/DetectionBasedTracker_jni.cpp
@ -88,7 +88,7 @@ JNIEXPORT jlong JNICALL Java_org_opencv_samples_facedetect_DetectionBasedTracker
            //trackingDetector->setMinObjectSize(Size(faceSize, faceSize));
        }
    }
-    catch(cv::Exception& e)
+    catch(const cv::Exception& e)
    {
        LOGD("nativeCreateObject caught cv::Exception: %s", e.what());
        jclass je = jenv->FindClass("org/opencv/core/CvException");
@ -121,7 +121,7 @@ JNIEXPORT void JNICALL Java_org_opencv_samples_facedetect_DetectionBasedTracker_
            delete (DetectorAgregator*)thiz;
        }
    }
-    catch(cv::Exception& e)
+    catch(const cv::Exception& e)
    {
        LOGD("nativeestroyObject caught cv::Exception: %s", e.what());
        jclass je = jenv->FindClass("org/opencv/core/CvException");
@ -147,7 +147,7 @@ JNIEXPORT void JNICALL Java_org_opencv_samples_facedetect_DetectionBasedTracker_
    {
        ((DetectorAgregator*)thiz)->tracker->run();
    }
-    catch(cv::Exception& e)
+    catch(const cv::Exception& e)
    {
        LOGD("nativeStart caught cv::Exception: %s", e.what());
        jclass je = jenv->FindClass("org/opencv/core/CvException");
@ -173,7 +173,7 @@ JNIEXPORT void JNICALL Java_org_opencv_samples_facedetect_DetectionBasedTracker_
    {
        ((DetectorAgregator*)thiz)->tracker->stop();
    }
-    catch(cv::Exception& e)
+    catch(const cv::Exception& e)
    {
        LOGD("nativeStop caught cv::Exception: %s", e.what());
        jclass je = jenv->FindClass("org/opencv/core/CvException");
@ -203,7 +203,7 @@ JNIEXPORT void JNICALL Java_org_opencv_samples_facedetect_DetectionBasedTracker_
            //((DetectorAgregator*)thiz)->trackingDetector->setMinObjectSize(Size(faceSize, faceSize));
        }
    }
-    catch(cv::Exception& e)
+    catch(const cv::Exception& e)
    {
        LOGD("nativeStop caught cv::Exception: %s", e.what());
        jclass je = jenv->FindClass("org/opencv/core/CvException");
@ -233,7 +233,7 @@ JNIEXPORT void JNICALL Java_org_opencv_samples_facedetect_DetectionBasedTracker_
        ((DetectorAgregator*)thiz)->tracker->getObjects(RectFaces);
        *((Mat*)faces) = Mat(RectFaces, true);
    }
-    catch(cv::Exception& e)
+    catch(const cv::Exception& e)
    {
        LOGD("nativeCreateObject caught cv::Exception: %s", e.what());
        jclass je = jenv->FindClass("org/opencv/core/CvException");
--- a/samples/android/tutorial-4-opencl/jni/CLprocessor.cpp
+++ b/samples/android/tutorial-4-opencl/jni/CLprocessor.cpp
@ -63,11 +63,11 @@ void dumpCLinfo()
                  i, name.c_str(), (type==CL_DEVICE_TYPE_GPU ? "GPU" : "CPU"), extensions.c_str() );
        }
    }
-    catch(cl::Error& e)
+    catch(const cl::Error& e)
    {
        LOGE( "OpenCL info: error while gathering OpenCL info: %s (%d)", e.what(), e.err() );
    }
-    catch(std::exception& e)
+    catch(const std::exception& e)
    {
        LOGE( "OpenCL info: error while gathering OpenCL info: %s", e.what() );
    }
@ -130,11 +130,11 @@ extern "C" void initCL()
            LOGE("Can't init OpenCV with OpenCL TAPI");
        haveOpenCL = true;
    }
-    catch(cl::Error& e)
+    catch(const cl::Error& e)
    {
        LOGE("cl::Error: %s (%d)", e.what(), e.err());
    }
-    catch(std::exception& e)
+    catch(const std::exception& e)
    {
        LOGE("std::exception: %s", e.what());
    }
--- a/samples/cpp/detect_blob.cpp
+++ b/samples/cpp/detect_blob.cpp
@ -192,7 +192,7 @@ int main(int argc, char *argv[])
            imshow("Original", img);
            waitKey();
        }
-        catch (Exception& e)
+        catch (const Exception& e)
        {
            cout << "Feature : " << *itDesc << "\n";
            cout << e.msg << endl;
--- a/samples/cpp/detect_mser.cpp
+++ b/samples/cpp/detect_mser.cpp
@ -523,7 +523,7 @@ int main(int argc, char *argv[])
            imshow(winName, result);
            imshow("Original", img);
        }
-        catch (Exception& e)
+        catch (const Exception& e)
        {
            cout << "Feature: " << *itDesc << "\n";
            cout << e.msg << endl;
--- a/samples/cpp/live_detect_qrcode.cpp
+++ b/samples/cpp/live_detect_qrcode.cpp
@ -177,7 +177,7 @@ int showImageQRCodeDetect(string in, string out)
        {
            imwrite(out, color_src, compression_params);
        }
-        catch (cv::Exception& ex)
+        catch (const cv::Exception& ex)
        {
            cout << "Exception converting image to PNG format: ";
            cout << ex.what() << '\n';
--- a/samples/cpp/matchmethod_orb_akaze_brisk.cpp
+++ b/samples/cpp/matchmethod_orb_akaze_brisk.cpp
@ -147,7 +147,7 @@ int main(int argc, char *argv[])
                    desMethCmp.push_back(cumSumDist2);
                    waitKey();
                }
-                catch (Exception& e)
+                catch (const Exception& e)
                {
                    cout << e.msg << endl;
                    cout << "Cumulative distance cannot be computed." << endl;
@ -155,7 +155,7 @@ int main(int argc, char *argv[])
                }
            }
        }
-        catch (Exception& e)
+        catch (const Exception& e)
        {
            cout << "Feature : " << *itDesc << "\n";
            if (itMatcher != typeAlgoMatch.end())
--- a/samples/cpp/pca.cpp
+++ b/samples/cpp/pca.cpp
@ -141,7 +141,7 @@ int main(int argc, char** argv)
    // Read in the data. This can fail if not valid
    try {
        read_imgList(imgList, images);
-    } catch (cv::Exception& e) {
+    } catch (const cv::Exception& e) {
        cerr << "Error opening file \"" << imgList << "\". Reason: " << e.msg << endl;
        exit(1);
    }
--- a/samples/directx/d3d10_interop.cpp
+++ b/samples/directx/d3d10_interop.cpp
@ -260,7 +260,7 @@ public:
            }
        } // try

-        catch (cv::Exception& e)
+        catch (const cv::Exception& e)
        {
            std::cerr << "Exception: " << e.what() << std::endl;
            return 10;
--- a/samples/directx/d3d11_interop.cpp
+++ b/samples/directx/d3d11_interop.cpp
@ -378,7 +378,7 @@ public:
            }
        } // try

-        catch (cv::Exception& e)
+        catch (const cv::Exception& e)
        {
            std::cerr << "Exception: " << e.what() << std::endl;
            cleanup();
--- a/samples/directx/d3d9_interop.cpp
+++ b/samples/directx/d3d9_interop.cpp
@ -225,7 +225,7 @@ public:
            }
        }  // try

-        catch (cv::Exception& e)
+        catch (const cv::Exception& e)
        {
            std::cerr << "Exception: " << e.what() << std::endl;
            return 10;
--- a/samples/directx/d3d9ex_interop.cpp
+++ b/samples/directx/d3d9ex_interop.cpp
@ -226,7 +226,7 @@ public:

        } // try

-        catch (cv::Exception& e)
+        catch (const cv::Exception& e)
        {
            std::cerr << "Exception: " << e.what() << std::endl;
            return 10;
--- a/samples/directx/d3dsample.hpp
+++ b/samples/directx/d3dsample.hpp
@ -158,7 +158,7 @@ int d3d_app(int argc, char** argv, std::string& title)
        return app.run();
    }

-    catch (cv::Exception& e)
+    catch (const cv::Exception& e)
    {
        std::cerr << "Exception: " << e.what() << std::endl;
        return 10;
--- a/samples/dnn/tf_text_graph_faster_rcnn.py
+++ b/samples/dnn/tf_text_graph_faster_rcnn.py
@ -32,6 +32,8 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
    width_stride = float(grid_anchor_generator['width_stride'][0])
    height_stride = float(grid_anchor_generator['height_stride'][0])
    features_stride = float(config['feature_extractor'][0]['first_stage_features_stride'][0])
+    first_stage_nms_iou_threshold = float(config['first_stage_nms_iou_threshold'][0])
+    first_stage_max_proposals = int(config['first_stage_max_proposals'][0])

    print('Number of classes: %d' % num_classes)
    print('Scales:            %s' % str(scales))
@ -47,7 +49,8 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
    removeIdentity(graph_def)

    def to_remove(name, op):
-        return name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep)
+        return name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep) or \
+               (name.startswith('CropAndResize') and op != 'CropAndResize')

    removeUnusedNodesAndAttrs(to_remove, graph_def)

@ -114,10 +117,10 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
    detectionOut.addAttr('num_classes', 2)
    detectionOut.addAttr('share_location', True)
    detectionOut.addAttr('background_label_id', 0)
-    detectionOut.addAttr('nms_threshold', 0.7)
+    detectionOut.addAttr('nms_threshold', first_stage_nms_iou_threshold)
    detectionOut.addAttr('top_k', 6000)
    detectionOut.addAttr('code_type', "CENTER_SIZE")
-    detectionOut.addAttr('keep_top_k', 100)
+    detectionOut.addAttr('keep_top_k', first_stage_max_proposals)
    detectionOut.addAttr('clip', False)

    graph_def.node.extend([detectionOut])
@ -147,9 +150,11 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
              'SecondStageBoxPredictor/Reshape_1/Reshape', [1, -1], graph_def)

    # Replace Flatten subgraph onto a single node.
+    cropAndResizeNodeName = ''
    for i in reversed(range(len(graph_def.node))):
        if graph_def.node[i].op == 'CropAndResize':
            graph_def.node[i].input.insert(1, 'detection_out/clip_by_value')
+            cropAndResizeNodeName = graph_def.node[i].name

        if graph_def.node[i].name == 'SecondStageBoxPredictor/Reshape':
            addConstNode('SecondStageBoxPredictor/Reshape/shape2', [1, -1, 4], graph_def)
@ -159,11 +164,15 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):

        if graph_def.node[i].name in ['SecondStageBoxPredictor/Flatten/flatten/Shape',
                                      'SecondStageBoxPredictor/Flatten/flatten/strided_slice',
-                                      'SecondStageBoxPredictor/Flatten/flatten/Reshape/shape']:
+                                      'SecondStageBoxPredictor/Flatten/flatten/Reshape/shape',
+                                      'SecondStageBoxPredictor/Flatten_1/flatten/Shape',
+                                      'SecondStageBoxPredictor/Flatten_1/flatten/strided_slice',
+                                      'SecondStageBoxPredictor/Flatten_1/flatten/Reshape/shape']:
            del graph_def.node[i]

    for node in graph_def.node:
-        if node.name == 'SecondStageBoxPredictor/Flatten/flatten/Reshape':
+        if node.name == 'SecondStageBoxPredictor/Flatten/flatten/Reshape' or \
+           node.name == 'SecondStageBoxPredictor/Flatten_1/flatten/Reshape':
            node.op = 'Flatten'
            node.input.pop()

@ -171,6 +180,11 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
                         'SecondStageBoxPredictor/BoxEncodingPredictor/MatMul']:
            node.addAttr('loc_pred_transposed', True)

+        if node.name.startswith('MaxPool2D'):
+            assert(node.op == 'MaxPool')
+            assert(cropAndResizeNodeName)
+            node.input = [cropAndResizeNodeName]
+
    ################################################################################
    ### Postprocessing
    ################################################################################
--- a/samples/opencl/opencl-opencv-interop.cpp
+++ b/samples/opencl/opencl-opencv-interop.cpp
@ -676,7 +676,7 @@ int App::initVideoSource()
            throw std::runtime_error(std::string("specify video source"));
    }

-    catch (std::exception e)
+    catch (const std::exception e)
    {
        cerr << "ERROR: " << e.what() << std::endl;
        return -1;
--- a/samples/opengl/opengl_interop.cpp
+++ b/samples/opengl/opengl_interop.cpp
@ -325,7 +325,7 @@ public:
        }


-        catch (cv::Exception& e)
+        catch (const cv::Exception& e)
        {
            std::cerr << "Exception: " << e.what() << std::endl;
            return 10;
@ -520,7 +520,7 @@ int main(int argc, char** argv)
        app.create();
        return app.run();
    }
-    catch (cv::Exception& e)
+    catch (const cv::Exception& e)
    {
        cerr << "Exception: " << e.what() << endl;
        return 10;
--- a/samples/python/digits_video.py
+++ b/samples/python/digits_video.py
@ -86,7 +86,7 @@ def main():
                frame[y:,x+w:][:SZ, :SZ] = bin_norm[...,np.newaxis]

            sample = preprocess_hog([bin_norm])
-            digit = model.predict(sample)[0]
+            digit = model.predict(sample)[1].ravel()
            cv.putText(frame, '%d'%digit, (x, y), cv.FONT_HERSHEY_PLAIN, 1.0, (200, 0, 0), thickness = 1)


--- a/samples/va_intel/va_intel_interop.cpp
+++ b/samples/va_intel/va_intel_interop.cpp
@ -256,7 +256,7 @@ int main(int argc, char** argv)

        std::cout << "Interop " << (doInterop ? "ON " : "OFF") << ": processing time, msec: " << time << std::endl;
    }
-    catch (std::exception& ex)
+    catch (const std::exception& ex)
    {
        std::cerr << "ERROR: " << ex.what() << std::endl;
    }