Merge remote-tracking branch 'upstream/master' into gtk3

Conflicts: CMakeLists.txt
11 years ago · d60be58a92
parent 7daec9e9a8 5600bc54f4
commit d60be58a92
129 changed files with 2926 additions and 1594 deletions
--- a/3rdparty/jinja2/markupsafe/init.py
+++ b/3rdparty/jinja2/markupsafe/init.py
@ -9,7 +9,7 @@
    :license: BSD, see LICENSE for more details.
 """
 import re
-from _compat import text_type, string_types, int_types, \
+from ._compat import text_type, string_types, int_types, \
     unichr, PY2


@ -227,7 +227,7 @@ class _MarkupEscapeHelper(object):
 try:
    from _speedups import escape, escape_silent, soft_unicode
 except ImportError:
-    from _native import escape, escape_silent, soft_unicode
+    from ._native import escape, escape_silent, soft_unicode

 if not PY2:
    soft_str = soft_unicode
--- a/3rdparty/jinja2/markupsafe/_native.py
+++ b/3rdparty/jinja2/markupsafe/_native.py
@ -8,7 +8,7 @@
    :copyright: (c) 2010 by Armin Ronacher.
    :license: BSD, see LICENSE for more details.
 """
-from _compat import text_type
+from ._compat import text_type


 def escape(s):
--- a/3rdparty/jinja2/utils.py
+++ b/3rdparty/jinja2/utils.py
@ -517,4 +517,4 @@ class Joiner(object):


 # Imported here because that's where it was in the past
-from markupsafe import Markup, escape, soft_unicode
+from .markupsafe import Markup, escape, soft_unicode
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -128,7 +128,8 @@ OCV_OPTION(WITH_GSTREAMER      "Include Gstreamer support"                   ON
 OCV_OPTION(WITH_GSTREAMER_0_10 "Enable Gstreamer 0.10 support (instead of 1.x)"                              OFF )
 OCV_OPTION(WITH_GTK            "Include GTK support"                         ON   IF (UNIX AND NOT APPLE AND NOT ANDROID) )
 OCV_OPTION(WITH_GTK_2_X        "Use GTK version 2"                           OFF  IF (UNIX AND NOT APPLE AND NOT ANDROID) )
-OCV_OPTION(WITH_IPP            "Include Intel IPP support"                   OFF  IF (MSVC OR X86 OR X86_64) )
+OCV_OPTION(WITH_ICV            "Include Intel IPP ICV support"               ON   IF (NOT IOS) )
+OCV_OPTION(WITH_IPP            "Include Intel IPP support"                   OFF  IF (NOT IOS) )
 OCV_OPTION(WITH_JASPER         "Include JPEG2K support"                      ON   IF (NOT IOS) )
 OCV_OPTION(WITH_JPEG           "Include JPEG support"                        ON)
 OCV_OPTION(WITH_WEBP           "Include WebP support"                        ON   IF (NOT IOS) )
@ -748,15 +749,7 @@ else()
        status("    Cocoa:"  YES)
      endif()
    else()
-      if(HAVE_GTK3)
-        status("    GTK+ 3.x:" HAVE_GTK     THEN "YES (ver ${ALIASOF_gtk+-3.0_VERSION})"     ELSE NO)
-      elseif(HAVE_GTK)
-        status("    GTK+ 2.x:" HAVE_GTK      THEN "YES (ver ${ALIASOF_gtk+-2.0_VERSION})"     ELSE NO)
-      else()
-        if(DEFINED WITH_GTK)
-          staus("    GTK+:" NO)
-        endif()
-      endif()
+      status("    GTK+ 2.x:" HAVE_GTK      THEN "YES (ver ${ALIASOF_gtk+-2.0_VERSION})"     ELSE NO)
      status("    GThread :" HAVE_GTHREAD  THEN "YES (ver ${ALIASOF_gthread-2.0_VERSION})"  ELSE NO)
      status("    GtkGlExt:" HAVE_GTKGLEXT THEN "YES (ver ${ALIASOF_gtkglext-1.0_VERSION})" ELSE NO)
    endif()
--- a/cmake/OpenCVFindIPP.cmake
+++ b/cmake/OpenCVFindIPP.cmake
@ -2,35 +2,41 @@
 # The script to detect Intel(R) Integrated Performance Primitives (IPP)
 # installation/package
 #
-# This will try to find Intel IPP libraries, and include path by automatic
-# search through typical install locations and if failed it will
-# examine IPPROOT environment variable.
-# Note, IPPROOT is not set by IPP installer, it should be set manually.
+# Windows host:
+# Run script like this before cmake:
+#   call "<IPP_INSTALL_DIR>\bin\ippvars.bat" intel64
+# for example:
+#   call "C:\Program Files (x86)\Intel\Composer XE\ipp\bin\ippvars.bat" intel64
+#
+# Linux host:
+# Run script like this before cmake:
+#   source /opt/intel/ipp/bin/ippvars.sh [ia32|intel64]
 #
 # On return this will define:
 #
-# IPP_FOUND        - True if Intel IPP found
-# IPP_ROOT_DIR     - root of IPP installation
-# IPP_INCLUDE_DIRS - IPP include folder
-# IPP_LIBRARY_DIRS - IPP libraries folder
-# IPP_LIBRARIES    - IPP libraries names that are used by OpenCV
-# IPP_LATEST_VERSION_STR   - string with the newest detected IPP version
-# IPP_LATEST_VERSION_MAJOR - numbers of IPP version (MAJOR.MINOR.BUILD)
-# IPP_LATEST_VERSION_MINOR
-# IPP_LATEST_VERSION_BUILD
+# HAVE_IPP          - True if Intel IPP found
+# HAVE_IPP_ICV_ONLY - True if Intel IPP ICV version is available
+# IPP_ROOT_DIR      - root of IPP installation
+# IPP_INCLUDE_DIRS  - IPP include folder
+# IPP_LIBRARIES     - IPP libraries that are used by OpenCV
+# IPP_VERSION_STR   - string with the newest detected IPP version
+# IPP_VERSION_MAJOR - numbers of IPP version (MAJOR.MINOR.BUILD)
+# IPP_VERSION_MINOR
+# IPP_VERSION_BUILD
 #
 # Created: 30 Dec 2010 by Vladimir Dudnik (vladimir.dudnik@intel.com)
 #

-set(IPP_FOUND)
-set(IPP_VERSION_STR "5.3.0.0") # will not detect earlier versions
-set(IPP_VERSION_MAJOR 0)
-set(IPP_VERSION_MINOR 0)
-set(IPP_VERSION_BUILD 0)
-set(IPP_ROOT_DIR)
-set(IPP_INCLUDE_DIRS)
-set(IPP_LIBRARY_DIRS)
-set(IPP_LIBRARIES)
+unset(HAVE_IPP CACHE)
+unset(HAVE_IPP_ICV_ONLY)
+unset(IPP_ROOT_DIR)
+unset(IPP_INCLUDE_DIRS)
+unset(IPP_LIBRARIES)
+unset(IPP_VERSION_STR)
+unset(IPP_VERSION_MAJOR)
+unset(IPP_VERSION_MINOR)
+unset(IPP_VERSION_BUILD)
+
 set(IPP_LIB_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX})
 set(IPP_LIB_SUFFIX  ${CMAKE_STATIC_LIBRARY_SUFFIX})
 set(IPP_PREFIX "ipp")
@ -42,322 +48,184 @@ set(IPPCC      "cc")   # color conversion
 set(IPPCV      "cv")   # computer vision
 set(IPPVM      "vm")   # vector math

-
 set(IPP_X64 0)
-if (CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8)
+if(CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8)
    set(IPP_X64 1)
 endif()
-if (CMAKE_CL_64)
+if(CMAKE_CL_64)
    set(IPP_X64 1)
 endif()

-# ------------------------------------------------------------------------
-# This function detect IPP version by analyzing ippversion.h file
-# Note, ippversion.h file was inroduced since IPP 5.3
-# ------------------------------------------------------------------------
-function(get_ipp_version _ROOT_DIR)
-    set(_VERSION_STR)
-    set(_MAJOR)
-    set(_MINOR)
-    set(_BUILD)
-
-    # read IPP version info from file
-    file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR1 REGEX "IPP_VERSION_MAJOR")
-    file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR2 REGEX "IPP_VERSION_MINOR")
-    file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR3 REGEX "IPP_VERSION_BUILD")
-    if("${STR3}" STREQUAL "")
-        file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR3 REGEX "IPP_VERSION_UPDATE")
-    endif()
-    file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR4 REGEX "IPP_VERSION_STR")
-
-    # extract info and assign to variables
-    string(REGEX MATCHALL "[0-9]+" _MAJOR ${STR1})
-    string(REGEX MATCHALL "[0-9]+" _MINOR ${STR2})
-    string(REGEX MATCHALL "[0-9]+" _BUILD ${STR3})
-    string(REGEX MATCHALL "[0-9]+[.]+[0-9]+[^\"]+|[0-9]+[.]+[0-9]+" _VERSION_STR ${STR4})
-
-    # export info to parent scope
-    set(IPP_VERSION_STR   ${_VERSION_STR} PARENT_SCOPE)
-    set(IPP_VERSION_MAJOR ${_MAJOR}       PARENT_SCOPE)
-    set(IPP_VERSION_MINOR ${_MINOR}       PARENT_SCOPE)
-    set(IPP_VERSION_BUILD ${_BUILD}       PARENT_SCOPE)
-
-    message(STATUS "found IPP: ${_MAJOR}.${_MINOR}.${_BUILD} [${_VERSION_STR}]")
-    message(STATUS "at: ${_ROOT_DIR}")
-
+# This function detects IPP version by analyzing ippversion.h file
+macro(ipp_get_version _ROOT_DIR)
+  unset(_VERSION_STR)
+  unset(_MAJOR)
+  unset(_MINOR)
+  unset(_BUILD)
+
+  # read IPP version info from file
+  file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR1 REGEX "IPP_VERSION_MAJOR")
+  file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR2 REGEX "IPP_VERSION_MINOR")
+  file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR3 REGEX "IPP_VERSION_BUILD")
+  if("${STR3}" STREQUAL "")
+    file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR3 REGEX "IPP_VERSION_UPDATE")
+  endif()
+  file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR4 REGEX "IPP_VERSION_STR")
+
+  # extract info and assign to variables
+  string(REGEX MATCHALL "[0-9]+" _MAJOR ${STR1})
+  string(REGEX MATCHALL "[0-9]+" _MINOR ${STR2})
+  string(REGEX MATCHALL "[0-9]+" _BUILD ${STR3})
+  string(REGEX MATCHALL "[0-9]+[.]+[0-9]+[^\"]+|[0-9]+[.]+[0-9]+" _VERSION_STR ${STR4})
+
+  # export info to parent scope
+  set(IPP_VERSION_STR   ${_VERSION_STR})
+  set(IPP_VERSION_MAJOR ${_MAJOR})
+  set(IPP_VERSION_MINOR ${_MINOR})
+  set(IPP_VERSION_BUILD ${_BUILD})
+
+  set(__msg)
+  if(EXISTS ${_ROOT_DIR}/include/ippicv.h)
+    ocv_assert(WITH_ICV AND NOT WITH_IPP)
+    set(__msg " ICV version")
+    set(HAVE_IPP_ICV_ONLY 1)
+  endif()
+
+  message(STATUS "found IPP: ${_MAJOR}.${_MINOR}.${_BUILD} [${_VERSION_STR}]${__msg}")
+  message(STATUS "at: ${_ROOT_DIR}")
+endmacro()
+
+
+# This function sets IPP_INCLUDE_DIRS and IPP_LIBRARIES variables
+macro(ipp_set_variables _LATEST_VERSION)
+  if(${_LATEST_VERSION} VERSION_LESS "7.0")
+    message(SEND_ERROR "IPP ${_LATEST_VERSION} is not supported")
+    unset(HAVE_IPP)
    return()
-
-endfunction()
-
-
-# ------------------------------------------------------------------------
-# This is auxiliary function called from set_ipp_variables()
-# to set IPP_LIBRARIES variable in IPP 6.x style (IPP 5.3 should also work)
-# ------------------------------------------------------------------------
-function(set_ipp_old_libraries)
-    set(IPP_PREFIX "ipp")
-    set(IPP_SUFFIX)            # old style static core libs suffix
-    set(IPP_ARCH)              # architecture suffix
-    set(IPP_DISP   "emerged")  # old style dipatcher and cpu-specific
-    set(IPP_MRGD   "merged")   #   static libraries
-    set(IPPCORE    "core")     # core functionality
-    set(IPPSP      "s")        # signal processing
-    set(IPPIP      "i")        # image processing
-    set(IPPCC      "cc")       # color conversion
-    set(IPPCV      "cv")       # computer vision
-    set(IPPVM      "vm")       # vector math
-
-    if (IPP_X64)
-        set(IPP_ARCH "em64t")
+  endif()
+
+  # set INCLUDE and LIB folders
+  set(IPP_INCLUDE_DIRS ${IPP_ROOT_DIR}/include)
+
+  if(NOT HAVE_IPP_ICV_ONLY)
+    if(APPLE)
+      set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/lib)
+    elseif(IPP_X64)
+      if(NOT EXISTS ${IPP_ROOT_DIR}/lib/intel64)
+        message(SEND_ERROR "IPP EM64T libraries not found")
+      endif()
+      set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/lib/intel64)
+    else()
+      if(NOT EXISTS ${IPP_ROOT_DIR}/lib/ia32)
+        message(SEND_ERROR "IPP IA32 libraries not found")
+      endif()
+      set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/lib/ia32)
    endif()
-
-    if(WIN32)
-        set(IPP_SUFFIX "l")
+  else()
+    if(APPLE)
+      set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/libs/macosx)
+    elseif(WIN32 AND NOT ARM)
+      set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/libs/windows)
+    elseif(UNIX)
+      set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/libs/linux)
+    else()
+      message(MESSAGE "IPP ${_LATEST_VERSION} at ${IPP_ROOT_DIR} is not supported")
+      unset(HAVE_IPP)
+      return()
    endif()
-
-    set(IPP_LIBRARIES
-        ${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPVM}${IPP_MRGD}${IPP_ARCH}${IPP_LIB_SUFFIX}
-        ${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPVM}${IPP_DISP}${IPP_ARCH}${IPP_LIB_SUFFIX}
-        ${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCC}${IPP_MRGD}${IPP_ARCH}${IPP_LIB_SUFFIX}
-        ${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCC}${IPP_DISP}${IPP_ARCH}${IPP_LIB_SUFFIX}
-        ${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCV}${IPP_MRGD}${IPP_ARCH}${IPP_LIB_SUFFIX}
-        ${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCV}${IPP_DISP}${IPP_ARCH}${IPP_LIB_SUFFIX}
-        ${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPIP}${IPP_MRGD}${IPP_ARCH}${IPP_LIB_SUFFIX}
-        ${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPIP}${IPP_DISP}${IPP_ARCH}${IPP_LIB_SUFFIX}
-        ${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPSP}${IPP_MRGD}${IPP_ARCH}${IPP_LIB_SUFFIX}
-        ${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPSP}${IPP_DISP}${IPP_ARCH}${IPP_LIB_SUFFIX}
-        ${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCORE}${IPP_ARCH}${IPP_SUFFIX}${IPP_LIB_SUFFIX}
-        PARENT_SCOPE)
-
-    return()
-
-endfunction()
-
-
-# ------------------------------------------------------------------------
-# This is auxiliary function called from set_ipp_variables()
-# to set IPP_LIBRARIES variable in IPP 7.x and 8.x style
-# ------------------------------------------------------------------------
-function(set_ipp_new_libraries _LATEST_VERSION)
-    set(IPP_PREFIX "ipp")
-
-    if(${_LATEST_VERSION} VERSION_LESS "8.0")
-        set(IPP_SUFFIX "_l")        # static not threaded libs suffix IPP 7.x
+    if(X86_64)
+      set(IPP_LIBRARY_DIR ${IPP_LIBRARY_DIR}/intel64)
    else()
-        if(WIN32)
-            set(IPP_SUFFIX "mt")    # static not threaded libs suffix IPP 8.x for Windows
-        else()
-            set(IPP_SUFFIX "")      # static not threaded libs suffix IPP 8.x for Linux/OS X
-        endif()
+      set(IPP_LIBRARY_DIR ${IPP_LIBRARY_DIR}/ia32)
    endif()
-    set(IPPCORE    "core")     # core functionality
-    set(IPPSP      "s")        # signal processing
-    set(IPPIP      "i")        # image processing
-    set(IPPCC      "cc")       # color conversion
-    set(IPPCV      "cv")       # computer vision
-    set(IPPVM      "vm")       # vector math
+  endif()

-    set(IPP_LIBRARIES
-        ${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPVM}${IPP_SUFFIX}${IPP_LIB_SUFFIX}
-        ${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCC}${IPP_SUFFIX}${IPP_LIB_SUFFIX}
-        ${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCV}${IPP_SUFFIX}${IPP_LIB_SUFFIX}
-        ${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPI}${IPP_SUFFIX}${IPP_LIB_SUFFIX}
-        ${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPS}${IPP_SUFFIX}${IPP_LIB_SUFFIX}
-        ${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCORE}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
-
-    if (UNIX)
-        set(IPP_LIBRARIES
-            ${IPP_LIBRARIES}
-            ${IPP_LIB_PREFIX}irc${CMAKE_SHARED_LIBRARY_SUFFIX}
-            ${IPP_LIB_PREFIX}imf${CMAKE_SHARED_LIBRARY_SUFFIX}
-            ${IPP_LIB_PREFIX}svml${CMAKE_SHARED_LIBRARY_SUFFIX})
+  set(IPP_PREFIX "ipp")
+  if(${_LATEST_VERSION} VERSION_LESS "8.0")
+    set(IPP_SUFFIX "_l")        # static not threaded libs suffix IPP 7.x
+  else()
+    if(WIN32)
+      set(IPP_SUFFIX "mt")    # static not threaded libs suffix IPP 8.x for Windows
+    else()
+      set(IPP_SUFFIX "")      # static not threaded libs suffix IPP 8.x for Linux/OS X
    endif()
-    set(IPP_LIBRARIES ${IPP_LIBRARIES} PARENT_SCOPE)
-    return()
-
-endfunction()
-
-
-# ------------------------------------------------------------------------
-# This function will set
-# IPP_INCLUDE_DIRS, IPP_LIBRARY_DIRS and IPP_LIBRARIES variables depending
-# on IPP version parameter.
-# Since IPP 7.0 version library names and install folder structure
-# was changed
-# ------------------------------------------------------------------------
-function(set_ipp_variables _LATEST_VERSION)
-    if(${_LATEST_VERSION} VERSION_LESS "7.0")
-#        message(STATUS "old")
-
-        # set INCLUDE and LIB folders
-        set(IPP_INCLUDE_DIRS ${IPP_ROOT_DIR}/include PARENT_SCOPE)
-        set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib     PARENT_SCOPE)
-
-        if (IPP_X64)
-            if(NOT EXISTS ${IPP_ROOT_DIR}/../em64t)
-                message(SEND_ERROR "IPP EM64T libraries not found")
-            endif()
-        else()
-            if(NOT EXISTS ${IPP_ROOT_DIR}/../ia32)
-                message(SEND_ERROR "IPP IA32 libraries not found")
-            endif()
-        endif()
-
-        # set IPP_LIBRARIES variable (6.x lib names)
-        set_ipp_old_libraries()
-        set(IPP_LIBRARIES ${IPP_LIBRARIES} PARENT_SCOPE)
-        message(STATUS "IPP libs: ${IPP_LIBRARIES}")
-
+  endif()
+  set(IPPCORE "core")     # core functionality
+  set(IPPSP   "s")        # signal processing
+  set(IPPIP   "i")        # image processing
+  set(IPPCC   "cc")       # color conversion
+  set(IPPCV   "cv")       # computer vision
+  set(IPPVM   "vm")       # vector math
+
+  list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPVM}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
+  list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCC}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
+  list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCV}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
+  list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPI}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
+  list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPS}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
+  list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCORE}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
+
+# FIXIT
+#  if(UNIX AND NOT HAVE_IPP_ICV_ONLY)
+#    get_filename_component(INTEL_COMPILER_LIBRARY_DIR ${IPP_ROOT_DIR}/../lib REALPATH)
+  if(UNIX)
+    if(NOT HAVE_IPP_ICV_ONLY)
+      get_filename_component(INTEL_COMPILER_LIBRARY_DIR ${IPP_ROOT_DIR}/../lib REALPATH)
    else()
-#        message(STATUS "new")
-
-        # set INCLUDE and LIB folders
-        set(IPP_INCLUDE_DIRS ${IPP_ROOT_DIR}/include PARENT_SCOPE)
-
-        if (APPLE)
-            set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib)
-        elseif (IPP_X64)
-            if(NOT EXISTS ${IPP_ROOT_DIR}/lib/intel64)
-                message(SEND_ERROR "IPP EM64T libraries not found")
-            endif()
-            set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib/intel64)
-        else()
-            if(NOT EXISTS ${IPP_ROOT_DIR}/lib/ia32)
-                message(SEND_ERROR "IPP IA32 libraries not found")
-            endif()
-            set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib/ia32)
-        endif()
-
-        if (UNIX)
-            get_filename_component(INTEL_COMPILER_LIBRARY_DIR ${IPP_ROOT_DIR}/../lib REALPATH)
-            if (IPP_X64)
-                if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR}/intel64)
-                    message(SEND_ERROR "Intel compiler EM64T libraries not found")
-                endif()
-                set(IPP_LIBRARY_DIRS
-                    ${IPP_LIBRARY_DIRS}
-                    ${INTEL_COMPILER_LIBRARY_DIR}/intel64)
-            else()
-                if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR}/ia32)
-                    message(SEND_ERROR "Intel compiler IA32 libraries not found")
-                endif()
-                set(IPP_LIBRARY_DIRS
-                    ${IPP_LIBRARY_DIRS}
-                    ${INTEL_COMPILER_LIBRARY_DIR}/ia32)
-            endif()
-        endif()
-        set(IPP_LIBRARY_DIRS ${IPP_LIBRARY_DIRS} PARENT_SCOPE)
-
-        # set IPP_LIBRARIES variable (7.x or 8.x lib names)
-        set_ipp_new_libraries(${_LATEST_VERSION})
-        set(IPP_LIBRARIES ${IPP_LIBRARIES} PARENT_SCOPE)
-        message(STATUS "IPP libs: ${IPP_LIBRARIES}")
-
+      set(INTEL_COMPILER_LIBRARY_DIR "/opt/intel/lib")
    endif()
-
-    return()
-
-endfunction()
+    if(IPP_X64)
+      if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR}/intel64)
+        message(SEND_ERROR "Intel compiler EM64T libraries not found")
+      endif()
+      set(INTEL_COMPILER_LIBRARY_DIR ${INTEL_COMPILER_LIBRARY_DIR}/intel64)
+    else()
+      if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR}/ia32)
+        message(SEND_ERROR "Intel compiler IA32 libraries not found")
+      endif()
+      set(INTEL_COMPILER_LIBRARY_DIR ${INTEL_COMPILER_LIBRARY_DIR}/ia32)
+    endif()
+    list(APPEND IPP_LIBRARIES ${INTEL_COMPILER_LIBRARY_DIR}/${IPP_LIB_PREFIX}irc${CMAKE_SHARED_LIBRARY_SUFFIX})
+    list(APPEND IPP_LIBRARIES ${INTEL_COMPILER_LIBRARY_DIR}/${IPP_LIB_PREFIX}imf${CMAKE_SHARED_LIBRARY_SUFFIX})
+    list(APPEND IPP_LIBRARIES ${INTEL_COMPILER_LIBRARY_DIR}/${IPP_LIB_PREFIX}svml${CMAKE_SHARED_LIBRARY_SUFFIX})
+  endif()
+
+  #message(STATUS "IPP libs: ${IPP_LIBRARIES}")
+endmacro()
+
+if(WITH_IPP)
+  set(IPPPATH $ENV{IPPROOT})
+  if(UNIX)
+    list(APPEND IPPPATH /opt/intel/ipp)
+  endif()
+elseif(WITH_ICV)
+  if(DEFINED ENV{IPPICVROOT})
+    set(IPPPATH $ENV{IPPICVROOT})
+  else()
+    set(IPPPATH ${OpenCV_SOURCE_DIR}/3rdparty/ippicv)
+  endif()
+endif()


-# ------------------------------------------------------------------------
-# This section will look for IPP through IPPROOT env variable
-# Note, IPPROOT is not set by IPP installer, you may need to set it manually
-# ------------------------------------------------------------------------
 find_path(
    IPP_H_PATH
    NAMES ippversion.h
-    PATHS $ENV{IPPROOT}
+    PATHS ${IPPPATH}
    PATH_SUFFIXES include
    DOC "The path to Intel(R) IPP header files"
    NO_DEFAULT_PATH
    NO_CMAKE_PATH)

 if(IPP_H_PATH)
-    set(IPP_FOUND 1)
+    set(HAVE_IPP 1)

-    # traverse up to IPPROOT level
    get_filename_component(IPP_ROOT_DIR ${IPP_H_PATH} PATH)

-    # extract IPP version info
-    get_ipp_version(${IPP_ROOT_DIR})
-
-    # keep info in the same vars for auto search and search by IPPROOT
-    set(IPP_LATEST_VERSION_STR   ${IPP_VERSION_STR})
-    set(IPP_LATEST_VERSION_MAJOR ${IPP_VERSION_MAJOR})
-    set(IPP_LATEST_VERSION_MINOR ${IPP_VERSION_MINOR})
-    set(IPP_LATEST_VERSION_BUILD ${IPP_VERSION_BUILD})
-
-    # set IPP INCLUDE, LIB dirs and library names
-    set_ipp_variables(${IPP_LATEST_VERSION_STR})
+    ipp_get_version(${IPP_ROOT_DIR})
+    ipp_set_variables(${IPP_VERSION_STR})
 endif()


-if(NOT IPP_FOUND)
-    # reset var from previous search
-    set(IPP_H_PATH)
-
-
-    # ------------------------------------------------------------------------
-    # This section will look for IPP through system program folders
-    # Note, if several IPP installations found the newest version will be
-    # selected
-    # ------------------------------------------------------------------------
-    foreach(curdir ${CMAKE_SYSTEM_PREFIX_PATH})
-        set(curdir ${curdir}/intel)
-        file(TO_CMAKE_PATH ${curdir} CURDIR)
-
-        if(EXISTS ${curdir})
-            file(GLOB_RECURSE IPP_H_DIR ${curdir}/ippversion.h)
-
-            if(IPP_H_DIR)
-                set(IPP_FOUND 1)
-            endif()
-
-            # init IPP_LATEST_VERSION  version with oldest detectable version (5.3.0.0)
-            #   IPP prior 5.3 did not have ippversion.h file
-            set(IPP_LATEST_VERSION_STR ${IPP_VERSION_STR})
-
-            # look through all dirs where ippversion.h was found
-            foreach(item ${IPP_H_DIR})
-
-                # traverse up to IPPROOT level
-                get_filename_component(_FILE_PATH ${item} PATH)
-                get_filename_component(_ROOT_DIR ${_FILE_PATH} PATH)
-
-                # extract IPP version info
-                get_ipp_version(${_ROOT_DIR})
-
-                # remember the latest version (if many found)
-                if(${IPP_LATEST_VERSION_STR} VERSION_LESS ${IPP_VERSION_STR})
-                    set(IPP_LATEST_VERSION_STR   ${IPP_VERSION_STR})
-                    set(IPP_LATEST_VERSION_MAJOR ${IPP_VERSION_MAJOR})
-                    set(IPP_LATEST_VERSION_MINOR ${IPP_VERSION_MINOR})
-                    set(IPP_LATEST_VERSION_BUILD ${IPP_VERSION_BUILD})
-                    set(IPP_ROOT_DIR ${_ROOT_DIR})
-                endif()
-            endforeach()
-        endif()
-    endforeach()
-endif()
-
-if(IPP_FOUND)
-    # set IPP INCLUDE, LIB dirs and library names
-    set_ipp_variables(${IPP_LATEST_VERSION_STR})
-
-    # set CACHE variable IPP_H_PATH,
-    # path to IPP header files for the latest version
-    find_path(
-        IPP_H_PATH
-        NAMES ippversion.h
-        PATHS ${IPP_ROOT_DIR}
-        PATH_SUFFIXES include
-        DOC "The path to Intel(R) IPP header files"
-        NO_DEFAULT_PATH
-        NO_CMAKE_PATH)
-endif()
-
-if(WIN32 AND MINGW AND NOT IPP_LATEST_VERSION_MAJOR LESS 7)
+if(WIN32 AND MINGW AND NOT IPP_VERSION_MAJOR LESS 7)
    # Since IPP built with Microsoft compiler and /GS option
    # ======================================================
    # From Windows SDK 7.1
--- a/cmake/OpenCVFindIPPAsync.cmake
+++ b/cmake/OpenCVFindIPPAsync.cmake
@ -0,0 +1,45 @@
+# Main variables:
+# IPP_A_LIBRARIES and IPP_A_INCLUDE to use IPP Async
+# HAVE_IPP_A for conditional compilation OpenCV with/without IPP Async
+
+# IPP_ASYNC_ROOT - root of IPP Async installation
+
+if(X86_64)
+    find_path(
+    IPP_A_INCLUDE_DIR
+    NAMES ipp_async_defs.h
+    PATHS $ENV{IPP_ASYNC_ROOT}
+    PATH_SUFFIXES include
+    DOC "Path to Intel IPP Async interface headers")
+
+    find_file(
+    IPP_A_LIBRARIES
+    NAMES ipp_async_preview.lib
+    PATHS $ENV{IPP_ASYNC_ROOT}
+    PATH_SUFFIXES lib/intel64
+    DOC "Path to Intel IPP Async interface libraries")
+
+else()
+    find_path(
+    IPP_A_INCLUDE_DIR
+    NAMES ipp_async_defs.h
+    PATHS $ENV{IPP_ASYNC_ROOT}
+    PATH_SUFFIXES include
+    DOC "Path to Intel IPP Async interface headers")
+
+    find_file(
+    IPP_A_LIBRARIES
+    NAMES ipp_async_preview.lib
+    PATHS $ENV{IPP_ASYNC_ROOT}
+    PATH_SUFFIXES lib/ia32
+    DOC "Path to Intel IPP Async interface libraries")
+endif()
+
+if(IPP_A_INCLUDE_DIR AND IPP_A_LIBRARIES)
+    set(HAVE_IPP_A TRUE)
+else()
+    set(HAVE_IPP_A FALSE)
+    message(WARNING "Intel IPP Async library directory (set by IPP_A_LIBRARIES_DIR variable) is not found or does not have Intel IPP Async libraries.")
+endif()
+
+mark_as_advanced(FORCE IPP_A_LIBRARIES IPP_A_INCLUDE_DIR)
--- a/cmake/OpenCVFindLibsPerf.cmake
+++ b/cmake/OpenCVFindLibsPerf.cmake
@ -8,16 +8,24 @@ if(WITH_TBB)
 endif(WITH_TBB)

 # --- IPP ---
-ocv_clear_vars(IPP_FOUND)
-if(WITH_IPP)
+if(WITH_IPP OR WITH_ICV)
  include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindIPP.cmake")
-  if(IPP_FOUND)
-    add_definitions(-DHAVE_IPP)
+  if(HAVE_IPP)
    ocv_include_directories(${IPP_INCLUDE_DIRS})
-    link_directories(${IPP_LIBRARY_DIRS})
-    set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${IPP_LIBRARIES})
+    list(APPEND OPENCV_LINKER_LIBS ${IPP_LIBRARIES})
  endif()
-endif(WITH_IPP)
+endif()
+
+# --- IPP Async ---
+
+if(WITH_IPP_A)
+  include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindIPPAsync.cmake")
+  if(IPP_A_INCLUDE_DIR AND IPP_A_LIBRARIES)
+    ocv_include_directories(${IPP_A_INCLUDE_DIR})
+    link_directories(${IPP_A_LIBRARIES})
+    set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${IPP_A_LIBRARIES})
+   endif()
+endif(WITH_IPP_A)

 # --- CUDA ---
 if(WITH_CUDA)
--- a/cmake/templates/OpenCVConfig.cmake.in
+++ b/cmake/templates/OpenCVConfig.cmake.in
@ -213,7 +213,7 @@ foreach(__opttype OPT DBG)
  SET(OpenCV_EXTRA_LIBS_${__opttype} "")

  # CUDA
-  if(OpenCV_CUDA_VERSION AND (CMAKE_CROSSCOMPILING OR (WIN32 AND NOT OpenCV_SHARED)))
+  if(OpenCV_CUDA_VERSION)
    if(NOT CUDA_FOUND)
      find_package(CUDA ${OpenCV_CUDA_VERSION} EXACT REQUIRED)
    else()
@ -222,32 +222,41 @@ foreach(__opttype OPT DBG)
      endif()
    endif()

-    list(APPEND OpenCV_EXTRA_LIBS_${__opttype} ${CUDA_LIBRARIES})
+    set(OpenCV_CUDA_LIBS_ABSPATH ${CUDA_LIBRARIES})

    if(${CUDA_VERSION} VERSION_LESS "5.5")
-      list(APPEND OpenCV_EXTRA_LIBS_${__opttype} ${CUDA_npp_LIBRARY})
+      list(APPEND OpenCV_CUDA_LIBS_ABSPATH ${CUDA_npp_LIBRARY})
    else()
      find_cuda_helper_libs(nppc)
      find_cuda_helper_libs(nppi)
      find_cuda_helper_libs(npps)
-      list(APPEND OpenCV_EXTRA_LIBS_${__opttype} ${CUDA_nppc_LIBRARY} ${CUDA_nppi_LIBRARY} ${CUDA_npps_LIBRARY})
+      list(APPEND OpenCV_CUDA_LIBS_ABSPATH ${CUDA_nppc_LIBRARY} ${CUDA_nppi_LIBRARY} ${CUDA_npps_LIBRARY})
    endif()

    if(OpenCV_USE_CUBLAS)
-      list(APPEND OpenCV_EXTRA_LIBS_${__opttype} ${CUDA_CUBLAS_LIBRARIES})
+      list(APPEND OpenCV_CUDA_LIBS_ABSPATH ${CUDA_CUBLAS_LIBRARIES})
    endif()

    if(OpenCV_USE_CUFFT)
-      list(APPEND OpenCV_EXTRA_LIBS_${__opttype} ${CUDA_CUFFT_LIBRARIES})
+      list(APPEND OpenCV_CUDA_LIBS_ABSPATH ${CUDA_CUFFT_LIBRARIES})
    endif()

    if(OpenCV_USE_NVCUVID)
-      list(APPEND OpenCV_EXTRA_LIBS_${__opttype} ${CUDA_nvcuvid_LIBRARIES})
+      list(APPEND OpenCV_CUDA_LIBS_ABSPATH ${CUDA_nvcuvid_LIBRARIES})
    endif()

    if(WIN32)
-      list(APPEND OpenCV_EXTRA_LIBS_${__opttype} ${CUDA_nvcuvenc_LIBRARIES})
+      list(APPEND OpenCV_CUDA_LIBS_ABSPATH ${CUDA_nvcuvenc_LIBRARIES})
    endif()
+
+    set(OpenCV_CUDA_LIBS_RELPATH "")
+    foreach(l ${OpenCV_CUDA_LIBS_ABSPATH})
+      get_filename_component(_tmp ${l} PATH)
+      list(APPEND OpenCV_CUDA_LIBS_RELPATH ${_tmp})
+    endforeach()
+
+    list(REMOVE_DUPLICATES OpenCV_CUDA_LIBS_RELPATH)
+    link_directories(${OpenCV_CUDA_LIBS_RELPATH})
  endif()
 endforeach()

--- a/cmake/templates/cvconfig.h.in
+++ b/cmake/templates/cvconfig.h.in
@ -93,6 +93,10 @@

 /* Intel Integrated Performance Primitives */
 #cmakedefine HAVE_IPP
+#cmakedefine HAVE_IPP_ICV_ONLY
+
+/* Intel IPP Async */
+#cmakedefine HAVE_IPP_A

 /* JPEG-2000 codec */
 #cmakedefine HAVE_JASPER
--- a/doc/conf.py
+++ b/doc/conf.py
@ -304,11 +304,11 @@ extlinks = {
            'oldbasicstructures' : ('http://docs.opencv.org/modules/core/doc/old_basic_structures.html#%s', None),
            'readwriteimagevideo' : ('http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html#%s', None),
            'operationsonarrays' : ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html#%s', None),
-            'utilitysystemfunctions':('http://docs.opencv.org/modules/core/doc/utility_and_system_functions_and_macros.html#%s', None),
-            'imgprocfilter':('http://docs.opencv.org/modules/imgproc/doc/filtering.html#%s', None),
-            'svms':('http://docs.opencv.org/modules/ml/doc/support_vector_machines.html#%s', None),
-            'drawingfunc':('http://docs.opencv.org/modules/core/doc/drawing_functions.html#%s', None),
-            'xmlymlpers':('http://docs.opencv.org/modules/core/doc/xml_yaml_persistence.html#%s', None),
+            'utilitysystemfunctions' : ('http://docs.opencv.org/modules/core/doc/utility_and_system_functions_and_macros.html#%s', None),
+            'imgprocfilter' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html#%s', None),
+            'svms' : ('http://docs.opencv.org/modules/ml/doc/support_vector_machines.html#%s', None),
+            'drawingfunc' : ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#%s', None),
+            'xmlymlpers' : ('http://docs.opencv.org/modules/core/doc/xml_yaml_persistence.html#%s', None),
            'hgvideo' : ('http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html#%s', None),
            'gpuinit' : ('http://docs.opencv.org/modules/gpu/doc/initalization_and_information.html#%s', None),
            'gpudatastructure' : ('http://docs.opencv.org/modules/gpu/doc/data_structures.html#%s', None),
@ -316,56 +316,58 @@ extlinks = {
            'gpuperelement' : ('http://docs.opencv.org/modules/gpu/doc/per_element_operations.html#%s', None),
            'gpuimgproc' : ('http://docs.opencv.org/modules/gpu/doc/image_processing.html#%s', None),
            'gpumatrixreduct' : ('http://docs.opencv.org/modules/gpu/doc/matrix_reductions.html#%s', None),
-            'filtering':('http://docs.opencv.org/modules/imgproc/doc/filtering.html#%s', None),
+            'filtering' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html#%s', None),
            'flann' : ('http://docs.opencv.org/modules/flann/doc/flann_fast_approximate_nearest_neighbor_search.html#%s', None ),
            'calib3d' : ('http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html#%s', None ),
            'feature2d' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html#%s', None ),
            'imgproc_geometric' : ('http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html#%s', None ),
+            'miscellaneous_transformations' : ('http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html#%s', None),
+            'user_interface' : ('http://docs.opencv.org/modules/highgui/doc/user_interface.html#%s', None),

            # 'opencv_group' : ('http://answers.opencv.org/%s', None),
            'opencv_qa' : ('http://answers.opencv.org/%s', None),
            'how_to_contribute' : ('http://code.opencv.org/projects/opencv/wiki/How_to_contribute/%s', None),

-            'cvt_color': ('http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html?highlight=cvtcolor#cvtcolor%s', None),
-            'imread':    ('http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html?highlight=imread#imread%s', None),
-            'imwrite':   ('http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html?highlight=imwrite#imwrite%s', None),
-            'imshow':    ('http://docs.opencv.org/modules/highgui/doc/user_interface.html?highlight=imshow#imshow%s', None),
-            'named_window': ('http://docs.opencv.org/modules/highgui/doc/user_interface.html?highlight=namedwindow#namedwindow%s', None),
-            'wait_key': ('http://docs.opencv.org/modules/highgui/doc/user_interface.html?highlight=waitkey#waitkey%s', None),
-            'add_weighted': ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=addweighted#addweighted%s', None),
-            'saturate_cast': ('http://docs.opencv.org/modules/core/doc/utility_and_system_functions_and_macros.html?highlight=saturate_cast#saturate-cast%s', None),
-            'mat_zeros': ('http://docs.opencv.org/modules/core/doc/basic_structures.html?highlight=zeros#mat-zeros%s', None),
-            'convert_to': ('http://docs.opencv.org/modules/core/doc/basic_structures.html#mat-convertto%s', None),
-            'create_trackbar': ('http://docs.opencv.org/modules/highgui/doc/user_interface.html?highlight=createtrackbar#createtrackbar%s', None),
-            'point': ('http://docs.opencv.org/modules/core/doc/basic_structures.html#point%s', None),
-            'scalar': ('http://docs.opencv.org/modules/core/doc/basic_structures.html#scalar%s', None),
-            'line': ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#line%s', None),
-            'ellipse': ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#ellipse%s', None),
-            'rectangle': ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#rectangle%s', None),
-            'circle': ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#circle%s', None),
-            'fill_poly': ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#fillpoly%s', None),
-            'rng': ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=rng#rng%s', None),
-            'put_text': ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#puttext%s', None),
-            'gaussian_blur': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=gaussianblur#gaussianblur%s', None),
-            'blur': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=blur#blur%s', None),
-            'median_blur': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=medianblur#medianblur%s', None),
-            'bilateral_filter': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=bilateralfilter#bilateralfilter%s', None),
-            'erode': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=erode#erode%s', None),
-            'dilate': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=dilate#dilate%s', None),
-            'get_structuring_element': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=getstructuringelement#getstructuringelement%s', None),
-            'flood_fill': ( 'http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html?highlight=floodfill#floodfill%s', None),
-            'morphology_ex': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=morphologyex#morphologyex%s', None),
-            'pyr_down': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=pyrdown#pyrdown%s', None),
-            'pyr_up': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=pyrup#pyrup%s', None),
-            'resize': ('http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html?highlight=resize#resize%s', None),
-            'threshold': ('http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html?highlight=threshold#threshold%s', None),
-            'filter2d': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=filter2d#filter2d%s', None),
-            'copy_make_border': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=copymakeborder#copymakeborder%s', None),
-            'sobel': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=sobel#sobel%s', None),
-            'scharr': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=scharr#scharr%s', None),
-            'laplacian': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=laplacian#laplacian%s', None),
-            'canny': ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=canny#canny%s', None),
-            'copy_to': ('http://docs.opencv.org/modules/core/doc/basic_structures.html?highlight=copyto#mat-copyto%s', None),
+            'cvt_color' : ('http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html?highlight=cvtcolor#cvtcolor%s', None),
+            'imread' : ('http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html?highlight=imread#imread%s', None),
+            'imwrite' : ('http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html?highlight=imwrite#imwrite%s', None),
+            'imshow' : ('http://docs.opencv.org/modules/highgui/doc/user_interface.html?highlight=imshow#imshow%s', None),
+            'named_window' : ('http://docs.opencv.org/modules/highgui/doc/user_interface.html?highlight=namedwindow#namedwindow%s', None),
+            'wait_key' : ('http://docs.opencv.org/modules/highgui/doc/user_interface.html?highlight=waitkey#waitkey%s', None),
+            'add_weighted' : ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=addweighted#addweighted%s', None),
+            'saturate_cast' : ('http://docs.opencv.org/modules/core/doc/utility_and_system_functions_and_macros.html?highlight=saturate_cast#saturate-cast%s', None),
+            'mat_zeros' : ('http://docs.opencv.org/modules/core/doc/basic_structures.html?highlight=zeros#mat-zeros%s', None),
+            'convert_to' : ('http://docs.opencv.org/modules/core/doc/basic_structures.html#mat-convertto%s', None),
+            'create_trackbar' : ('http://docs.opencv.org/modules/highgui/doc/user_interface.html?highlight=createtrackbar#createtrackbar%s', None),
+            'point' : ('http://docs.opencv.org/modules/core/doc/basic_structures.html#point%s', None),
+            'scalar' : ('http://docs.opencv.org/modules/core/doc/basic_structures.html#scalar%s', None),
+            'line' : ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#line%s', None),
+            'ellipse' : ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#ellipse%s', None),
+            'rectangle' : ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#rectangle%s', None),
+            'circle' : ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#circle%s', None),
+            'fill_poly' : ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#fillpoly%s', None),
+            'rng' : ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=rng#rng%s', None),
+            'put_text' : ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#puttext%s', None),
+            'gaussian_blur' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=gaussianblur#gaussianblur%s', None),
+            'blur' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=blur#blur%s', None),
+            'median_blur' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=medianblur#medianblur%s', None),
+            'bilateral_filter' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=bilateralfilter#bilateralfilter%s', None),
+            'erode' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=erode#erode%s', None),
+            'dilate' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=dilate#dilate%s', None),
+            'get_structuring_element' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=getstructuringelement#getstructuringelement%s', None),
+            'flood_fill' : ( 'http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html?highlight=floodfill#floodfill%s', None),
+            'morphology_ex' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=morphologyex#morphologyex%s', None),
+            'pyr_down' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=pyrdown#pyrdown%s', None),
+            'pyr_up' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=pyrup#pyrup%s', None),
+            'resize' : ('http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html?highlight=resize#resize%s', None),
+            'threshold' : ('http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html?highlight=threshold#threshold%s', None),
+            'filter2d' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=filter2d#filter2d%s', None),
+            'copy_make_border' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=copymakeborder#copymakeborder%s', None),
+            'sobel' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=sobel#sobel%s', None),
+            'scharr' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=scharr#scharr%s', None),
+            'laplacian' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=laplacian#laplacian%s', None),
+            'canny' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=canny#canny%s', None),
+            'copy_to' : ('http://docs.opencv.org/modules/core/doc/basic_structures.html?highlight=copyto#mat-copyto%s', None),
            'hough_lines' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=houghlines#houghlines%s', None),
            'hough_lines_p' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=houghlinesp#houghlinesp%s', None),
            'hough_circles' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=houghcircles#houghcircles%s', None),
@ -416,5 +418,7 @@ extlinks = {
            'background_subtractor' : ('http://docs.opencv.org/modules/video/doc/motion_analysis_and_object_tracking.html?highlight=backgroundsubtractor#backgroundsubtractor%s', None),
            'background_subtractor_mog' : ('http://docs.opencv.org/modules/video/doc/motion_analysis_and_object_tracking.html?highlight=backgroundsubtractorMOG#backgroundsubtractormog%s', None),
            'background_subtractor_mog_two' : ('http://docs.opencv.org/modules/video/doc/motion_analysis_and_object_tracking.html?highlight=backgroundsubtractorMOG2#backgroundsubtractormog2%s', None),
-            'video_capture' : ('http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html?highlight=videocapture#videocapture%s', None)
+            'video_capture' : ('http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html?highlight=videocapture#videocapture%s', None),
+            'ippa_convert': ('http://docs.opencv.org/modules/core/doc/ipp_async_converters.html#%s', None),
+            'ptr':('http://docs.opencv.org/modules/core/doc/basic_structures.html?highlight=Ptr#Ptr%s', None)
           }
--- a/doc/py_tutorials/py_setup/py_intro/py_intro.rst
+++ b/doc/py_tutorials/py_setup/py_intro/py_intro.rst
@ -7,45 +7,41 @@ Introduction to OpenCV-Python Tutorials
 OpenCV
 ===============

-OpenCV was started at Intel in 1999 by **Gary Bradsky** and the first release came out in 2000. **Vadim Pisarevsky** joined Gary Bradsky to manage Intel's Russian software OpenCV team. In 2005, OpenCV was used on Stanley, the vehicle who won 2005 DARPA Grand Challenge. Later its active development continued under the support of Willow Garage, with Gary Bradsky and Vadim Pisarevsky leading the project. Right now, OpenCV supports a lot of algorithms related to Computer Vision and Machine Learning and it is expanding day-by-day.
+OpenCV was started at Intel in 1999 by **Gary Bradsky**, and the first release came out in 2000. **Vadim Pisarevsky** joined Gary Bradsky to manage Intel's Russian software OpenCV team. In 2005, OpenCV was used on Stanley, the vehicle that won the 2005 DARPA Grand Challenge. Later, its active development continued under the support of Willow Garage with Gary Bradsky and Vadim Pisarevsky leading the project. OpenCV now supports a multitude of algorithms related to Computer Vision and Machine Learning and is expanding day by day.

-Currently OpenCV supports a wide variety of programming languages like C++, Python, Java etc and is available on different platforms including Windows, Linux, OS X, Android, iOS etc. Also, interfaces based on CUDA and OpenCL are also under active development for high-speed GPU operations.
+OpenCV supports a wide variety of programming languages such as C++, Python, Java, etc., and is available on different platforms including Windows, Linux, OS X, Android, and iOS. Interfaces for high-speed GPU operations based on CUDA and OpenCL are also under active development.

-OpenCV-Python is the Python API of OpenCV. It combines the best qualities of OpenCV C++ API and Python language.
+OpenCV-Python is the Python API for OpenCV, combining the best qualities of the OpenCV C++ API and the Python language.


 OpenCV-Python
 ===============

-Python is a general purpose programming language started by **Guido van Rossum**, which became very popular in short time mainly because of its simplicity and code readability. It enables the programmer to express his ideas in fewer lines of code without reducing any readability.
+OpenCV-Python is a library of Python bindings designed to solve computer vision problems.

-Compared to other languages like C/C++, Python is slower. But another important feature of Python is that it can be easily extended with C/C++. This feature helps us to write computationally intensive codes in C/C++ and create a Python wrapper for it so that we can use these wrappers as Python modules. This gives us two advantages: first, our code is as fast as original C/C++ code (since it is the actual C++ code working in background) and second, it is very easy to code in Python. This is how OpenCV-Python works, it is a Python wrapper around original C++ implementation.
+Python is a general purpose programming language started by **Guido van Rossum** that became very popular very quickly, mainly because of its simplicity and code readability. It enables the programmer to express ideas in fewer lines of code without reducing readability.

-And the support of Numpy makes the task more easier. **Numpy** is a highly optimized library for numerical operations. It gives a MATLAB-style syntax. All the OpenCV array structures are converted to-and-from Numpy arrays. So whatever operations you can do in Numpy, you can combine it with OpenCV, which increases number of weapons in your arsenal. Besides that, several other libraries like SciPy, Matplotlib which supports Numpy can be used with this.
+Compared to languages like C/C++, Python is slower. That said, Python can be easily extended with C/C++, which allows us to write computationally intensive code in C/C++ and create Python wrappers that can be used as Python modules. This gives us two advantages: first, the code is as fast as the original C/C++ code (since it is the actual C++ code working in background) and second, it easier to code in Python than C/C++. OpenCV-Python is a Python wrapper for the original OpenCV C++ implementation.

-So OpenCV-Python is an appropriate tool for fast prototyping of computer vision problems.
+OpenCV-Python makes use of **Numpy**, which is a highly optimized library for numerical operations with a MATLAB-style syntax. All the OpenCV array structures are converted to and from Numpy arrays. This also makes it easier to integrate with other libraries that use Numpy such as SciPy and Matplotlib.


 OpenCV-Python Tutorials
 =============================

-OpenCV introduces a new set of tutorials which will guide you through various functions available in OpenCV-Python. **This guide is mainly focused on OpenCV 3.x version** (although most of the tutorials will work with OpenCV 2.x also).
+OpenCV introduces a new set of tutorials which will guide you through various functions available in OpenCV-Python. **This guide is mainly focused on OpenCV 3.x version** (although most of the tutorials will also work with OpenCV 2.x).

-A prior knowledge on Python and Numpy is required before starting because they won't be covered in this guide. **Especially, a good knowledge on Numpy is must to write optimized codes in OpenCV-Python.**
+Prior knowledge of Python and Numpy is recommended as they won't be covered in this guide. **Proficiency with Numpy is a must in order to write optimized code using OpenCV-Python.**

-This tutorial has been started by *Abid Rahman K.* as part of Google Summer of Code 2013 program, under the guidance of *Alexander Mordvintsev*.
+This tutorial was originally started by *Abid Rahman K.* as part of the Google Summer of Code 2013 program under the guidance of *Alexander Mordvintsev*.


 OpenCV Needs You !!!
 ==========================

-Since OpenCV is an open source initiative, all are welcome to make contributions to this library. And it is same for this tutorial also.
+Since OpenCV is an open source initiative, all are welcome to make contributions to the library, documentation, and tutorials. If you find any mistake in this tutorial (from a small spelling mistake to an egregious error in code or concept), feel free to correct it by cloning OpenCV in `GitHub <https://github.com/Itseez/opencv>`_ and submitting a pull request. OpenCV developers will check your pull request, give you important feedback and (once it passes the approval of the reviewer) it will be merged into OpenCV. You will then become an open source contributor :-)

-So, if you find any mistake in this tutorial (whether it be a small spelling mistake or a big error in code or concepts, whatever), feel free to correct it.
-
-And that will be a good task for freshers who begin to contribute to open source projects. Just fork the OpenCV in github, make necessary corrections and send a pull request to OpenCV. OpenCV developers will check your pull request, give you important feedback and once it passes the approval of the reviewer, it will be merged to OpenCV. Then you become a open source contributor. Similar is the case with other tutorials, documentation etc.
-
-As new modules are added to OpenCV-Python, this tutorial will have to be expanded. So those who knows about particular algorithm can write up a tutorial which includes a basic theory of the algorithm and a code showing basic usage of the algorithm and submit it to OpenCV.
+As new modules are added to OpenCV-Python, this tutorial will have to be expanded. If you are familiar with a particular algorithm and can write up a tutorial including basic theory of the algorithm and code showing example usage, please do so.

 Remember, we **together** can make this project a great success !!!

--- a/doc/tutorials/core/how_to_use_ippa_conversion/how_to_use_ippa_conversion.rst
+++ b/doc/tutorials/core/how_to_use_ippa_conversion/how_to_use_ippa_conversion.rst
@ -0,0 +1,164 @@
+.. _howToUseIPPAconversion:
+
+Intel® IPP Asynchronous C/C++ library in OpenCV
+***********************************************
+
+Goal
+====
+
+.. _hppiSobel: http://software.intel.com/en-us/node/474701
+.. _hppiMatrix: http://software.intel.com/en-us/node/501660
+
+The tutorial demonstrates the `Intel® IPP Asynchronous C/C++ <http://software.intel.com/en-us/intel-ipp-preview>`_ library usage with OpenCV.
+The code example below illustrates implementation of the Sobel operation, accelerated with Intel® IPP Asynchronous C/C++ functions.
+In this code example, :ippa_convert:`hpp::getMat <>` and :ippa_convert:`hpp::getHpp <>` functions are used for data conversion between hppiMatrix_ and ``Mat`` matrices.
+
+Code
+====
+
+You may also find the source code in the :file:`samples/cpp/tutorial_code/core/ippasync/ippasync_sample.cpp`
+file of the OpenCV source library or :download:`download it from here
+<../../../../samples/cpp/tutorial_code/core/ippasync/ippasync_sample.cpp>`.
+
+.. literalinclude:: ../../../../samples/cpp/tutorial_code/core/ippasync/ippasync_sample.cpp
+   :language: cpp
+   :linenos:
+   :tab-width: 4
+
+Explanation
+===========
+
+#. Create parameters for OpenCV:
+
+   .. code-block:: cpp
+
+      VideoCapture cap;
+      Mat image, gray, result;
+
+   and IPP Async:
+
+   .. code-block:: cpp
+
+      hppiMatrix* src,* dst;
+      hppAccel accel = 0;
+      hppAccelType accelType;
+      hppStatus sts;
+      hppiVirtualMatrix * virtMatrix;
+
+#. Load input image or video. How to open and read video stream you can see in the :ref:`videoInputPSNRMSSIM` tutorial.
+
+   .. code-block:: cpp
+
+      if( useCamera )
+      {
+         printf("used camera\n");
+         cap.open(0);
+      }
+      else
+      {
+         printf("used image %s\n", file.c_str());
+         cap.open(file.c_str());
+      }
+
+      if( !cap.isOpened() )
+      {
+         printf("can not open camera or video file\n");
+         return -1;
+      }
+
+#. Create accelerator instance using `hppCreateInstance <http://software.intel.com/en-us/node/501686>`_:
+
+   .. code-block:: cpp
+
+      accelType = sAccel == "cpu" ? HPP_ACCEL_TYPE_CPU:
+                  sAccel == "gpu" ? HPP_ACCEL_TYPE_GPU:
+                                    HPP_ACCEL_TYPE_ANY;
+
+      //Create accelerator instance
+      sts = hppCreateInstance(accelType, 0, &accel);
+      CHECK_STATUS(sts, "hppCreateInstance");
+
+#. Create an array of virtual matrices using `hppiCreateVirtualMatrices <http://software.intel.com/en-us/node/501700>`_ function.
+
+   .. code-block:: cpp
+
+      virtMatrix = hppiCreateVirtualMatrices(accel, 1);
+
+#. Prepare a matrix for input and output data:
+
+   .. code-block:: cpp
+
+      cap >> image;
+      if(image.empty())
+         break;
+
+      cvtColor( image, gray, COLOR_BGR2GRAY );
+
+      result.create( image.rows, image.cols, CV_8U);
+
+#. Convert ``Mat`` to hppiMatrix_ using :ippa_convert:`getHpp <>` and call hppiSobel_ function.
+
+   .. code-block:: cpp
+
+      //convert Mat to hppiMatrix
+      src = getHpp(gray, accel);
+      dst = getHpp(result, accel);
+
+      sts = hppiSobel(accel,src, HPP_MASK_SIZE_3X3,HPP_NORM_L1,virtMatrix[0]);
+      CHECK_STATUS(sts,"hppiSobel");
+
+      sts = hppiConvert(accel, virtMatrix[0], 0, HPP_RND_MODE_NEAR, dst, HPP_DATA_TYPE_8U);
+      CHECK_STATUS(sts,"hppiConvert");
+
+      // Wait for tasks to complete
+      sts = hppWait(accel, HPP_TIME_OUT_INFINITE);
+      CHECK_STATUS(sts, "hppWait");
+
+   We use `hppiConvert <http://software.intel.com/en-us/node/501746>`_ because hppiSobel_ returns destination
+   matrix with ``HPP_DATA_TYPE_16S`` data type for source matrix with ``HPP_DATA_TYPE_8U`` type.
+   You should check ``hppStatus`` after each call IPP Async function.
+
+#. Create windows and show the images, the usual way.
+
+   .. code-block:: cpp
+
+      imshow("image", image);
+      imshow("rez", result);
+
+      waitKey(15);
+
+#. Delete hpp matrices.
+
+   .. code-block:: cpp
+
+      sts =  hppiFreeMatrix(src);
+      CHECK_DEL_STATUS(sts,"hppiFreeMatrix");
+
+      sts =  hppiFreeMatrix(dst);
+      CHECK_DEL_STATUS(sts,"hppiFreeMatrix");
+
+#. Delete virtual matrices and accelerator instance.
+
+   .. code-block:: cpp
+
+      if (virtMatrix)
+      {
+         sts = hppiDeleteVirtualMatrices(accel, virtMatrix);
+         CHECK_DEL_STATUS(sts,"hppiDeleteVirtualMatrices");
+      }
+
+      if (accel)
+      {
+         sts = hppDeleteInstance(accel);
+         CHECK_DEL_STATUS(sts, "hppDeleteInstance");
+      }
+
+Result
+=======
+
+After compiling the code above we can execute it giving an image or video path and accelerator type as an argument.
+For this tutorial we use baboon.png image as input. The result is below.
+
+  .. image:: images/How_To_Use_IPPA_Result.jpg
+    :alt: Final Result
+    :align: center
--- a/doc/tutorials/core/how_to_use_ippa_conversion/images/How_To_Use_IPPA_Result.jpg
+++ b/doc/tutorials/core/how_to_use_ippa_conversion/images/How_To_Use_IPPA_Result.jpg
--- a/doc/tutorials/core/mat-mask-operations/mat-mask-operations.rst
+++ b/doc/tutorials/core/mat-mask-operations/mat-mask-operations.rst
@ -32,14 +32,14 @@ Here's a function that will do this:

 .. code-block:: cpp

-   void Sharpen(const Mat& myImage,Mat& Result)
+   void Sharpen(const Mat& myImage, Mat& Result)
   {
       CV_Assert(myImage.depth() == CV_8U);  // accept only uchar images

-       Result.create(myImage.size(),myImage.type());
+       Result.create(myImage.size(), myImage.type());
       const int nChannels = myImage.channels();

-       for(int j = 1 ; j < myImage.rows-1; ++j)
+       for(int j = 1; j < myImage.rows - 1; ++j)
       {
           const uchar* previous = myImage.ptr<uchar>(j - 1);
           const uchar* current  = myImage.ptr<uchar>(j    );
@ -47,17 +47,17 @@ Here's a function that will do this:

           uchar* output = Result.ptr<uchar>(j);

-           for(int i= nChannels;i < nChannels*(myImage.cols-1); ++i)
+           for(int i = nChannels; i < nChannels * (myImage.cols - 1); ++i)
           {
-               *output++ = saturate_cast<uchar>(5*current[i]
-                            -current[i-nChannels] - current[i+nChannels] - previous[i] - next[i]);
+               *output++ = saturate_cast<uchar>(5 * current[i]
+                            -current[i - nChannels] - current[i + nChannels] - previous[i] - next[i]);
           }
       }

       Result.row(0).setTo(Scalar(0));
-       Result.row(Result.rows-1).setTo(Scalar(0));
+       Result.row(Result.rows - 1).setTo(Scalar(0));
       Result.col(0).setTo(Scalar(0));
-       Result.col(Result.cols-1).setTo(Scalar(0));
+       Result.col(Result.cols - 1).setTo(Scalar(0));
   }

 At first we make sure that the input images data is in unsigned char format. For this we use the :utilitysystemfunctions:`CV_Assert <cv-assert>` function that throws an error when the expression inside it is false.
@ -70,14 +70,14 @@ We create an output image with the same size and the same type as our input. As

 .. code-block:: cpp

-   Result.create(myImage.size(),myImage.type());
+   Result.create(myImage.size(), myImage.type());
   const int nChannels = myImage.channels();

 We'll use the plain C [] operator to access pixels. Because we need to access multiple rows at the same time we'll acquire the pointers for each of them (a previous, a current and a next line). We need another pointer to where we're going to save the calculation. Then simply access the right items with the [] operator. For moving the output pointer ahead we simply increase this (with one byte) after each operation:

 .. code-block:: cpp

-   for(int j = 1 ; j < myImage.rows-1; ++j)
+   for(int j = 1; j < myImage.rows - 1; ++j)
   {
       const uchar* previous = myImage.ptr<uchar>(j - 1);
       const uchar* current  = myImage.ptr<uchar>(j    );
@ -85,21 +85,21 @@ We'll use the plain C [] operator to access pixels. Because we need to access mu

       uchar* output = Result.ptr<uchar>(j);

-       for(int i= nChannels;i < nChannels*(myImage.cols-1); ++i)
+       for(int i = nChannels; i < nChannels * (myImage.cols - 1); ++i)
       {
-           *output++ = saturate_cast<uchar>(5*current[i]
-                        -current[i-nChannels] - current[i+nChannels] - previous[i] - next[i]);
+           *output++ = saturate_cast<uchar>(5 * current[i]
+                        -current[i - nChannels] - current[i + nChannels] - previous[i] - next[i]);
       }
   }

-On the borders of the image the upper notation results inexistent pixel locations (like minus one - minus one). In these points our formula is undefined. A simple solution is to not apply the mask in these points and, for example, set the pixels on the borders to zeros:
+On the borders of the image the upper notation results inexistent pixel locations (like minus one - minus one). In these points our formula is undefined. A simple solution is to not apply the kernel in these points and, for example, set the pixels on the borders to zeros:

 .. code-block:: cpp

-   Result.row(0).setTo(Scalar(0));             // The top row
-   Result.row(Result.rows-1).setTo(Scalar(0)); // The bottom row
-   Result.col(0).setTo(Scalar(0));             // The left column
-   Result.col(Result.cols-1).setTo(Scalar(0)); // The right column
+   Result.row(0).setTo(Scalar(0));               // The top row
+   Result.row(Result.rows - 1).setTo(Scalar(0)); // The bottom row
+   Result.col(0).setTo(Scalar(0));               // The left column
+   Result.col(Result.cols - 1).setTo(Scalar(0)); // The right column

 The filter2D function
 =====================
@ -116,7 +116,7 @@ Then call the :filtering:`filter2D <filter2d>` function specifying the input, th

 .. code-block:: cpp

-   filter2D(I, K, I.depth(), kern );
+   filter2D(I, K, I.depth(), kern);

 The function even has a fifth optional argument to specify the center of the kernel, and a sixth one for determining what to do in the regions where the operation is undefined (borders). Using this function has the advantage that it's shorter, less verbose and because there are some optimization techniques implemented it is usually faster than the *hand-coded method*. For example in my test while the second one took only 13 milliseconds the first took around 31 milliseconds. Quite some difference.

--- a/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.rst
+++ b/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.rst
@ -45,7 +45,7 @@ All the above objects, in the end, point to the same single data matrix. Their h
   :linenos:

   Mat D (A, Rect(10, 10, 100, 100) ); // using a rectangle
-   Mat E = A(Range:all(), Range(1,3)); // using row and column boundaries
+   Mat E = A(Range::all(), Range(1,3)); // using row and column boundaries

 Now you may ask if the matrix itself may belong to multiple *Mat* objects who takes responsibility for cleaning it up when it's no longer needed. The short answer is: the last object that used it. This is handled by using a reference counting mechanism. Whenever somebody copies a header of a *Mat* object, a counter is increased for the matrix. Whenever a header is cleaned this counter is decreased. When the counter reaches zero the matrix too is freed. Sometimes you will want to copy the matrix itself too, so OpenCV provides the :basicstructures:`clone() <mat-clone>` and :basicstructures:`copyTo() <mat-copyto>` functions.

@ -86,7 +86,7 @@ Each of the building components has their own valid domains. This leads to the d
 Creating a *Mat* object explicitly
 ==================================

-In the :ref:`Load_Save_Image` tutorial you have already learned how to write a matrix to an image file by using the :readWriteImageVideo:` imwrite() <imwrite>` function. However, for debugging purposes it's much more convenient to see the actual values. You can do this using the << operator of *Mat*. Be aware that this only works for two dimensional matrices.
+In the :ref:`Load_Save_Image` tutorial you have already learned how to write a matrix to an image file by using the :readwriteimagevideo:`imwrite() <imwrite>` function. However, for debugging purposes it's much more convenient to see the actual values. You can do this using the << operator of *Mat*. Be aware that this only works for two dimensional matrices.

 Although *Mat* works really well as an image container, it is also a general matrix class. Therefore, it is possible to create and manipulate multidimensional matrices. You can create a Mat object in multiple ways:

--- a/doc/tutorials/core/table_of_content_core/images/How_To_Use_IPPA.jpg
+++ b/doc/tutorials/core/table_of_content_core/images/How_To_Use_IPPA.jpg
--- a/doc/tutorials/core/table_of_content_core/table_of_content_core.rst
+++ b/doc/tutorials/core/table_of_content_core/table_of_content_core.rst
@ -200,7 +200,28 @@ Here you will learn the about the basic building blocks of the library. A must r
                   :height: 90pt
                   :width:  90pt

+  =============== ======================================================
+
+  .. tabularcolumns:: m{100pt} m{300pt}
+  .. cssclass:: toctableopencv
+
+  =============== ======================================================
+   |IPPIma|       **Title:** :ref:`howToUseIPPAconversion`
+
+                  *Compatibility:* > OpenCV 2.0

+                  *Author:* |Author_ElenaG|
+
+                  You will see how to use the IPP Async with OpenCV.
+
+  =============== ======================================================
+
+  .. |IPPIma| image:: images/How_To_Use_IPPA.jpg
+                   :height: 90pt
+                   :width:  90pt
+  .. |Author_ElenaG| unicode:: Elena U+0020 Gvozdeva
+
+  =============== ======================================================

 .. raw:: latex

@ -219,3 +240,4 @@ Here you will learn the about the basic building blocks of the library. A must r
   ../discrete_fourier_transform/discrete_fourier_transform
   ../file_input_output_with_xml_yml/file_input_output_with_xml_yml
   ../interoperability_with_OpenCV_1/interoperability_with_OpenCV_1
+   ../how_to_use_ippa_conversion/how_to_use_ippa_conversion
--- a/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst
+++ b/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst
@ -48,10 +48,10 @@ The structure of package contents looks as follows:

 ::

-    OpenCV-2.4.8-android-sdk
+    OpenCV-2.4.9-android-sdk
    |_ apk
-    |   |_ OpenCV_2.4.8_binary_pack_armv7a.apk
-    |   |_ OpenCV_2.4.8_Manager_2.16_XXX.apk
+    |   |_ OpenCV_2.4.9_binary_pack_armv7a.apk
+    |   |_ OpenCV_2.4.9_Manager_2.18_XXX.apk
    |
    |_ doc
    |_ samples
@ -157,10 +157,10 @@ Get the OpenCV4Android SDK

   .. code-block:: bash

-      unzip ~/Downloads/OpenCV-2.4.8-android-sdk.zip
+      unzip ~/Downloads/OpenCV-2.4.9-android-sdk.zip

-.. |opencv_android_bin_pack| replace:: :file:`OpenCV-2.4.8-android-sdk.zip`
-.. _opencv_android_bin_pack_url: http://sourceforge.net/projects/opencvlibrary/files/opencv-android/2.4.8/OpenCV-2.4.8-android-sdk.zip/download
+.. |opencv_android_bin_pack| replace:: :file:`OpenCV-2.4.9-android-sdk.zip`
+.. _opencv_android_bin_pack_url: http://sourceforge.net/projects/opencvlibrary/files/opencv-android/2.4.9/OpenCV-2.4.9-android-sdk.zip/download
 .. |opencv_android_bin_pack_url| replace:: |opencv_android_bin_pack|
 .. |seven_zip| replace:: 7-Zip
 .. _seven_zip: http://www.7-zip.org/
@ -295,7 +295,7 @@ Well, running samples from Eclipse is very simple:
  .. code-block:: sh
    :linenos:

-    <Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.8_Manager_2.16_armv7a-neon.apk
+    <Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.9_Manager_2.18_armv7a-neon.apk

  .. note:: ``armeabi``, ``armv7a-neon``, ``arm7a-neon-android8``, ``mips`` and ``x86`` stand for
            platform targets:
--- a/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.rst
+++ b/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.rst
@ -55,14 +55,14 @@ Manager to access OpenCV libraries externally installed in the target system.
   :guilabel:`File -> Import -> Existing project in your workspace`.

   Press :guilabel:`Browse`  button and locate OpenCV4Android SDK
-   (:file:`OpenCV-2.4.8-android-sdk/sdk`).
+   (:file:`OpenCV-2.4.9-android-sdk/sdk`).

   .. image:: images/eclipse_opencv_dependency0.png
        :alt: Add dependency from OpenCV library
        :align: center

 #. In application project add a reference to the OpenCV Java SDK in
-   :guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.8``.
+   :guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.9``.

   .. image:: images/eclipse_opencv_dependency1.png
        :alt: Add dependency from OpenCV library
@ -128,27 +128,27 @@ described above.
 #. Add the OpenCV library project to your workspace the same way as for the async initialization
   above. Use menu :guilabel:`File -> Import -> Existing project in your workspace`,
   press :guilabel:`Browse` button and select OpenCV SDK path
-   (:file:`OpenCV-2.4.8-android-sdk/sdk`).
+   (:file:`OpenCV-2.4.9-android-sdk/sdk`).

   .. image:: images/eclipse_opencv_dependency0.png
        :alt: Add dependency from OpenCV library
        :align: center

 #. In the application project add a reference to the OpenCV4Android SDK in
-   :guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.8``;
+   :guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.9``;

   .. image:: images/eclipse_opencv_dependency1.png
       :alt: Add dependency from OpenCV library
       :align: center

 #. If your application project **doesn't have a JNI part**, just copy the corresponding OpenCV
-   native libs from :file:`<OpenCV-2.4.8-android-sdk>/sdk/native/libs/<target_arch>` to your
+   native libs from :file:`<OpenCV-2.4.9-android-sdk>/sdk/native/libs/<target_arch>` to your
   project directory to folder :file:`libs/<target_arch>`.

   In case of the application project **with a JNI part**, instead of manual libraries copying you
   need to modify your ``Android.mk`` file:
   add the following two code lines after the ``"include $(CLEAR_VARS)"`` and before
-   ``"include path_to_OpenCV-2.4.8-android-sdk/sdk/native/jni/OpenCV.mk"``
+   ``"include path_to_OpenCV-2.4.9-android-sdk/sdk/native/jni/OpenCV.mk"``

   .. code-block:: make
      :linenos:
@ -221,7 +221,7 @@ taken:

   .. code-block:: make

-      include C:\Work\OpenCV4Android\OpenCV-2.4.8-android-sdk\sdk\native\jni\OpenCV.mk
+      include C:\Work\OpenCV4Android\OpenCV-2.4.9-android-sdk\sdk\native\jni\OpenCV.mk

   Should be inserted into the :file:`jni/Android.mk` file **after** this line:

--- a/doc/tutorials/introduction/load_save_image/load_save_image.rst
+++ b/doc/tutorials/introduction/load_save_image/load_save_image.rst
@ -5,7 +5,7 @@ Load, Modify, and Save an Image

 .. note::

-   We assume that by now you know how to load an image using :imread:`imread <>` and to display it in a window (using :imshow:`imshow <>`). Read the :ref:`Display_Image` tutorial otherwise.
+   We assume that by now you know how to load an image using :readwriteimagevideo:`imread <imread>` and to display it in a window (using :user_interface:`imshow <imshow>`). Read the :ref:`Display_Image` tutorial otherwise.

 Goals
 ======
@ -14,9 +14,9 @@ In this tutorial you will learn how to:

 .. container:: enumeratevisibleitemswithsquare

-   * Load an image using :imread:`imread <>`
-   * Transform an image from BGR to Grayscale format by using :cvt_color:`cvtColor <>`
-   * Save your transformed image in a file on disk (using :imwrite:`imwrite <>`)
+   * Load an image using :readwriteimagevideo:`imread <imread>`
+   * Transform an image from BGR to Grayscale format by using :miscellaneous_transformations:`cvtColor <cvtcolor>`
+   * Save your transformed image in a file on disk (using :readwriteimagevideo:`imwrite <imwrite>`)

 Code
 ======
@ -62,10 +62,7 @@ Here it is:
 Explanation
 ============

-#. We begin by:
-
-   * Creating a Mat object to store the image information
-   * Load an image using :imread:`imread <>`, located in the path given by *imageName*. Fort this example, assume you are loading a RGB image.
+#. We begin by loading an image using :readwriteimagevideo:`imread <imread>`, located in the path given by *imageName*. For this example, assume you are loading a RGB image.

 #. Now we are going to convert our image from BGR to Grayscale format. OpenCV has a really nice function to do this kind of transformations:

@ -73,15 +70,15 @@ Explanation

      cvtColor( image, gray_image, CV_BGR2GRAY );

-   As you can see, :cvt_color:`cvtColor <>` takes as arguments:
+   As you can see, :miscellaneous_transformations:`cvtColor <cvtcolor>` takes as arguments:

   .. container:: enumeratevisibleitemswithsquare

      * a source image (*image*)
      * a destination image (*gray_image*), in which we will save the converted image.
-      * an additional parameter that indicates what kind of transformation will be performed. In this case we use **CV_BGR2GRAY** (because of :imread:`imread <>` has BGR default channel order in case of color images).
+      * an additional parameter that indicates what kind of transformation will be performed. In this case we use **CV_BGR2GRAY** (because of :readwriteimagevideo:`imread <imread>` has BGR default channel order in case of color images).

-#. So now we have our new *gray_image* and want to save it on disk (otherwise it will get lost after the program ends). To save it, we will use a function analagous to :imread:`imread <>`: :imwrite:`imwrite <>`
+#. So now we have our new *gray_image* and want to save it on disk (otherwise it will get lost after the program ends). To save it, we will use a function analagous to :readwriteimagevideo:`imread <imread>`: :readwriteimagevideo:`imwrite <imwrite>`

   .. code-block:: cpp

--- a/doc/tutorials/introduction/windows_install/windows_install.rst
+++ b/doc/tutorials/introduction/windows_install/windows_install.rst
@ -62,6 +62,8 @@ Building the OpenCV library from scratch requires a couple of tools installed be
 .. _IntelTBB: http://threadingbuildingblocks.org/file.php?fid=77
 .. |IntelIIP| replace:: Intel |copy| Integrated Performance Primitives (*IPP*)
 .. _IntelIIP: http://software.intel.com/en-us/articles/intel-ipp/
+.. |IntelIIPA| replace:: Intel |copy| IPP Asynchronous C/C++
+.. _IntelIIPA: http://software.intel.com/en-us/intel-ipp-preview
 .. |qtframework| replace:: Qt framework
 .. _qtframework: http://qt.nokia.com/downloads
 .. |Eigen| replace:: Eigen
@ -97,6 +99,8 @@ OpenCV may come in multiple flavors. There is a "core" section that will work on

   + |IntelIIP|_ may be used to improve the performance of color conversion, Haar training and DFT functions of the OpenCV library. Watch out, since this isn't a free service.

+   + |IntelIIPA|_ is currently focused delivering Intel |copy| Graphics support for advanced image processing and computer vision functions.
+
   + OpenCV offers a somewhat fancier and more useful graphical user interface, than the default one by using the |qtframework|_. For a quick overview of what this has to offer look into the documentations *highgui* module, under the *Qt New Functions* section. Version 4.6 or later of the framework is required.

   + |Eigen|_ is a C++ template library for linear algebra.
@ -168,6 +172,8 @@ Building the library
         :alt: The Miktex Install Screen
         :align: center

+   #) For the |IntelIIPA|_ download the source files and set environment variable **IPP_ASYNC_ROOT**. It should point to :file:`<your Program Files(x86) directory>/Intel/IPP Preview */ipp directory`. Here ``*`` denotes the particular preview name.
+
   #) In case of the |Eigen|_ library it is again a case of download and extract to the :file:`D:/OpenCV/dep` directory.

   #) Same as above with |OpenEXR|_.
--- a/doc/tutorials/tutorials.rst
+++ b/doc/tutorials/tutorials.rst
@ -102,7 +102,7 @@ As always, we would be happy to hear your comments and receive your contribution
   .. cssclass:: toctableopencv

   =========== =======================================================
-   |Video|     Look here in order to find use on your video stream algoritms like: motion extraction, feature tracking and foreground extractions.
+   |Video|     Look here in order to find use on your video stream algorithms like: motion extraction, feature tracking and foreground extractions.

   =========== =======================================================

--- a/doc/tutorials/video/table_of_content_video/table_of_content_video.rst
+++ b/doc/tutorials/video/table_of_content_video/table_of_content_video.rst
@ -3,7 +3,7 @@
 *video* module. Video analysis
 -----------------------------------------------------------

-Look here in order to find use on your video stream algoritms like: motion extraction, feature tracking and foreground extractions.
+Look here in order to find use on your video stream algorithms like: motion extraction, feature tracking and foreground extractions.

 .. include:: ../../definitions/tocDefinitions.rst

--- a/modules/calib3d/test/test_affine3.cpp
+++ b/modules/calib3d/test/test_affine3.cpp
@ -54,8 +54,8 @@ TEST(Calib3d_Affine3f, accuracy)
    cv::Rodrigues(rvec, expected);


-    ASSERT_EQ(0, norm(cv::Mat(affine.matrix, false).colRange(0, 3).rowRange(0, 3) != expected));
-    ASSERT_EQ(0, norm(cv::Mat(affine.linear()) != expected));
+    ASSERT_EQ(0, cvtest::norm(cv::Mat(affine.matrix, false).colRange(0, 3).rowRange(0, 3) != expected, cv::NORM_L2));
+    ASSERT_EQ(0, cvtest::norm(cv::Mat(affine.linear()) != expected, cv::NORM_L2));


    cv::Matx33d R = cv::Matx33d::eye();
@ -77,7 +77,7 @@ TEST(Calib3d_Affine3f, accuracy)
    cv::Mat diff;
    cv::absdiff(expected, result.matrix, diff);

-    ASSERT_LT(cv::norm(diff, cv::NORM_INF), 1e-15);
+    ASSERT_LT(cvtest::norm(diff, cv::NORM_INF), 1e-15);
 }

 TEST(Calib3d_Affine3f, accuracy_rvec)
@ -103,6 +103,6 @@ TEST(Calib3d_Affine3f, accuracy_rvec)
        cv::Rodrigues(R, vo);
        //std::cout << "O:" <<(cv::getTickCount() - s)*1000/cv::getTickFrequency() << std::endl;

-        ASSERT_LT(cv::norm(va - vo), 1e-9);
+        ASSERT_LT(cvtest::norm(va, vo, cv::NORM_L2), 1e-9);
    }
 }
--- a/modules/calib3d/test/test_affine3d_estimator.cpp
+++ b/modules/calib3d/test/test_affine3d_estimator.cpp
@ -108,9 +108,9 @@ bool CV_Affine3D_EstTest::test4Points()
    estimateAffine3D(fpts, tpts, aff_est, outliers);

    const double thres = 1e-3;
-    if (norm(aff_est, aff, NORM_INF) > thres)
+    if (cvtest::norm(aff_est, aff, NORM_INF) > thres)
    {
-        //cout << norm(aff_est, aff, NORM_INF) << endl;
+        //cout << cvtest::norm(aff_est, aff, NORM_INF) << endl;
        ts->set_failed_test_info(cvtest::TS::FAIL_MISMATCH);
        return false;
    }
@ -161,7 +161,7 @@ bool CV_Affine3D_EstTest::testNPoints()
    }

    const double thres = 1e-4;
-    if (norm(aff_est, aff, NORM_INF) > thres)
+    if (cvtest::norm(aff_est, aff, NORM_INF) > thres)
    {
        cout << "aff est: " << aff_est << endl;
        cout << "aff ref: " << aff << endl;
--- a/modules/calib3d/test/test_cameracalibration.cpp
+++ b/modules/calib3d/test/test_cameracalibration.cpp
@ -215,7 +215,7 @@ void CV_ProjectPointsTest::prepare_to_validation( int /*test_case_idx*/ )
    cvTsProjectPoints( m, vec2, m2v_jac );
    cvTsCopy( vec, vec2 );

-    theta0 = cvNorm( vec2, 0, CV_L2 );
+    theta0 = cvtest::norm( cvarrtomat(vec2), 0, CV_L2 );
    theta1 = fmod( theta0, CV_PI*2 );

    if( theta1 > CV_PI )
@ -225,7 +225,7 @@ void CV_ProjectPointsTest::prepare_to_validation( int /*test_case_idx*/ )
    if( calc_jacobians )
    {
        //cvInvert( v2m_jac, m2v_jac, CV_SVD );
-        if( cvNorm(&test_mat[OUTPUT][3],0,CV_C) < 1000 )
+        if( cvtest::norm(cvarrtomat(&test_mat[OUTPUT][3]), 0, CV_C) < 1000 )
        {
            cvTsGEMM( &test_mat[OUTPUT][1], &test_mat[OUTPUT][3],
                      1, 0, 0, &test_mat[OUTPUT][4],
@ -1112,7 +1112,7 @@ void CV_ProjectPointsTest::run(int)
            rightImgPoints[i], valDpdrot, valDpdt, valDpdf, valDpdc, valDpddist, 0 );
    }
    calcdfdx( leftImgPoints, rightImgPoints, dEps, valDpdrot );
-    err = norm( dpdrot, valDpdrot, NORM_INF );
+    err = cvtest::norm( dpdrot, valDpdrot, NORM_INF );
    if( err > 3 )
    {
        ts->printf( cvtest::TS::LOG, "bad dpdrot: too big difference = %g\n", err );
@ -1130,7 +1130,7 @@ void CV_ProjectPointsTest::run(int)
            rightImgPoints[i], valDpdrot, valDpdt, valDpdf, valDpdc, valDpddist, 0 );
    }
    calcdfdx( leftImgPoints, rightImgPoints, dEps, valDpdt );
-    if( norm( dpdt, valDpdt, NORM_INF ) > 0.2 )
+    if( cvtest::norm( dpdt, valDpdt, NORM_INF ) > 0.2 )
    {
        ts->printf( cvtest::TS::LOG, "bad dpdtvec\n" );
        code = cvtest::TS::FAIL_BAD_ACCURACY;
@ -1153,7 +1153,7 @@ void CV_ProjectPointsTest::run(int)
    project( objPoints, rvec, tvec, rightCameraMatrix, distCoeffs,
        rightImgPoints[1], valDpdrot, valDpdt, valDpdf, valDpdc, valDpddist, 0 );
    calcdfdx( leftImgPoints, rightImgPoints, dEps, valDpdf );
-    if ( norm( dpdf, valDpdf ) > 0.2 )
+    if ( cvtest::norm( dpdf, valDpdf, NORM_L2 ) > 0.2 )
    {
        ts->printf( cvtest::TS::LOG, "bad dpdf\n" );
        code = cvtest::TS::FAIL_BAD_ACCURACY;
@ -1174,7 +1174,7 @@ void CV_ProjectPointsTest::run(int)
    project( objPoints, rvec, tvec, rightCameraMatrix, distCoeffs,
        rightImgPoints[1], valDpdrot, valDpdt, valDpdf, valDpdc, valDpddist, 0 );
    calcdfdx( leftImgPoints, rightImgPoints, dEps, valDpdc );
-    if ( norm( dpdc, valDpdc ) > 0.2 )
+    if ( cvtest::norm( dpdc, valDpdc, NORM_L2 ) > 0.2 )
    {
        ts->printf( cvtest::TS::LOG, "bad dpdc\n" );
        code = cvtest::TS::FAIL_BAD_ACCURACY;
@ -1193,7 +1193,7 @@ void CV_ProjectPointsTest::run(int)
            rightImgPoints[i], valDpdrot, valDpdt, valDpdf, valDpdc, valDpddist, 0 );
    }
    calcdfdx( leftImgPoints, rightImgPoints, dEps, valDpddist );
-    if( norm( dpddist, valDpddist ) > 0.3 )
+    if( cvtest::norm( dpddist, valDpddist, NORM_L2 ) > 0.3 )
    {
        ts->printf( cvtest::TS::LOG, "bad dpddist\n" );
        code = cvtest::TS::FAIL_BAD_ACCURACY;
@ -1481,8 +1481,8 @@ void CV_StereoCalibrationTest::run( int )
        Mat eye33 = Mat::eye(3,3,CV_64F);
        Mat R1t = R1.t(), R2t = R2.t();

-        if( norm(R1t*R1 - eye33) > 0.01 ||
-            norm(R2t*R2 - eye33) > 0.01 ||
+        if( cvtest::norm(R1t*R1 - eye33, NORM_L2) > 0.01 ||
+            cvtest::norm(R2t*R2 - eye33, NORM_L2) > 0.01 ||
            abs(determinant(F)) > 0.01)
        {
            ts->printf( cvtest::TS::LOG, "The computed (by rectify) R1 and R2 are not orthogonal,"
@ -1505,7 +1505,7 @@ void CV_StereoCalibrationTest::run( int )

        //check that Tx after rectification is equal to distance between cameras
        double tx = fabs(P2.at<double>(0, 3) / P2.at<double>(0, 0));
-        if (fabs(tx - norm(T)) > 1e-5)
+        if (fabs(tx - cvtest::norm(T, NORM_L2)) > 1e-5)
        {
            ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
            return;
@ -1556,7 +1556,7 @@ void CV_StereoCalibrationTest::run( int )
        Mat reprojectedPoints;
        perspectiveTransform(sparsePoints, reprojectedPoints, Q);

-        if (norm(triangulatedPoints - reprojectedPoints) / sqrt((double)pointsCount) > requiredAccuracy)
+        if (cvtest::norm(triangulatedPoints, reprojectedPoints, NORM_L2) / sqrt((double)pointsCount) > requiredAccuracy)
        {
            ts->printf( cvtest::TS::LOG, "Points reprojected with a matrix Q and points reconstructed by triangulation are different, testcase %d\n", testcase);
            ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
@ -1581,7 +1581,7 @@ void CV_StereoCalibrationTest::run( int )
        {
            Mat error = newHomogeneousPoints2.row(i) * typedF * newHomogeneousPoints1.row(i).t();
            CV_Assert(error.rows == 1 && error.cols == 1);
-            if (norm(error) > constraintAccuracy)
+            if (cvtest::norm(error, NORM_L2) > constraintAccuracy)
            {
                ts->printf( cvtest::TS::LOG, "Epipolar constraint is violated after correctMatches, testcase %d\n", testcase);
                ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
--- a/modules/calib3d/test/test_cameracalibration_artificial.cpp
+++ b/modules/calib3d/test/test_cameracalibration_artificial.cpp
@ -204,7 +204,7 @@ protected:
            Rodrigues(rvecs[i], rmat);
            Rodrigues(rvecs_est[i], rmat_est);

-            if (norm(rmat_est, rmat) > eps* (norm(rmat) + dlt))
+            if (cvtest::norm(rmat_est, rmat, NORM_L2) > eps* (cvtest::norm(rmat, NORM_L2) + dlt))
            {
                if (err_count++ < errMsgNum)
                {
@ -213,7 +213,8 @@ protected:
                    else
                    {
                        ts->printf( cvtest::TS::LOG, "%d) Bad accuracy in returned rvecs (rotation matrs). Index = %d\n", r, i);
-                        ts->printf( cvtest::TS::LOG, "%d) norm(rot_mat_est - rot_mat_exp) = %f, norm(rot_mat_exp) = %f \n", r, norm(rmat_est, rmat), norm(rmat));
+                        ts->printf( cvtest::TS::LOG, "%d) norm(rot_mat_est - rot_mat_exp) = %f, norm(rot_mat_exp) = %f \n", r,
+                                   cvtest::norm(rmat_est, rmat, NORM_L2), cvtest::norm(rmat, NORM_L2));

                    }
                }
--- a/modules/calib3d/test/test_fundam.cpp
+++ b/modules/calib3d/test/test_fundam.cpp
@ -738,7 +738,7 @@ void CV_RodriguesTest::prepare_to_validation( int /*test_case_idx*/ )
    if( calc_jacobians )
    {
        //cvInvert( v2m_jac, m2v_jac, CV_SVD );
-        double nrm = norm(test_mat[REF_OUTPUT][3],CV_C);
+        double nrm = cvtest::norm(test_mat[REF_OUTPUT][3], CV_C);
        if( FLT_EPSILON < nrm && nrm < 1000 )
        {
            gemm( test_mat[OUTPUT][1], test_mat[OUTPUT][3],
@ -1409,8 +1409,8 @@ void CV_EssentialMatTest::prepare_to_validation( int test_case_idx )

    double* pose_prop1 = (double*)test_mat[REF_OUTPUT][2].data;
    double* pose_prop2 = (double*)test_mat[OUTPUT][2].data;
-    double terr1 = norm(Rt0.col(3) / norm(Rt0.col(3)) + test_mat[TEMP][3]);
-    double terr2 = norm(Rt0.col(3) / norm(Rt0.col(3)) - test_mat[TEMP][3]);
+    double terr1 = cvtest::norm(Rt0.col(3) / norm(Rt0.col(3)) + test_mat[TEMP][3], NORM_L2);
+    double terr2 = cvtest::norm(Rt0.col(3) / norm(Rt0.col(3)) - test_mat[TEMP][3], NORM_L2);
    Mat rvec;
    Rodrigues(Rt0.colRange(0, 3), rvec);
    pose_prop1[0] = 0;
--- a/modules/calib3d/test/test_homography.cpp
+++ b/modules/calib3d/test/test_homography.cpp
@ -119,7 +119,7 @@ bool CV_HomographyTest::check_matrix_size(const cv::Mat& H)

 bool CV_HomographyTest::check_matrix_diff(const cv::Mat& original, const cv::Mat& found, const int norm_type, double &diff)
 {
-    diff = cv::norm(original, found, norm_type);
+    diff = cvtest::norm(original, found, norm_type);
    return diff <= max_diff;
 }

--- a/modules/calib3d/test/test_solvepnp_ransac.cpp
+++ b/modules/calib3d/test/test_solvepnp_ransac.cpp
@ -299,8 +299,8 @@ TEST(DISABLED_Calib3d_SolvePnPRansac, concurrency)
        solvePnPRansac(object, image, camera_mat, dist_coef, rvec2, tvec2);
    }

-    double rnorm = cv::norm(rvec1, rvec2, NORM_INF);
-    double tnorm = cv::norm(tvec1, tvec2, NORM_INF);
+    double rnorm = cvtest::norm(rvec1, rvec2, NORM_INF);
+    double tnorm = cvtest::norm(tvec1, tvec2, NORM_INF);

    EXPECT_LT(rnorm, 1e-6);
    EXPECT_LT(tnorm, 1e-6);
--- a/modules/calib3d/test/test_stereomatching.cpp
+++ b/modules/calib3d/test/test_stereomatching.cpp
@ -279,7 +279,7 @@ float dispRMS( const Mat& computedDisp, const Mat& groundTruthDisp, const Mat& m
        checkTypeAndSizeOfMask( mask, sz );
        pointsCount = countNonZero(mask);
    }
-    return 1.f/sqrt((float)pointsCount) * (float)norm(computedDisp, groundTruthDisp, NORM_L2, mask);
+    return 1.f/sqrt((float)pointsCount) * (float)cvtest::norm(computedDisp, groundTruthDisp, NORM_L2, mask);
 }

 /*
--- a/modules/calib3d/test/test_undistort_points.cpp
+++ b/modules/calib3d/test/test_undistort_points.cpp
@ -84,7 +84,7 @@ void CV_UndistortTest::run(int /* start_from */)
        Mat p;
        perspectiveTransform(undistortedPoints, p, intrinsics);
        undistortedPoints = p;
-        double diff = norm(Mat(realUndistortedPoints), undistortedPoints);
+        double diff = cvtest::norm(Mat(realUndistortedPoints), undistortedPoints, NORM_L2);
        if (diff > thresh)
        {
            ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY);
--- a/modules/contrib/doc/facerec/facerec_api.rst
+++ b/modules/contrib/doc/facerec/facerec_api.rst
@ -75,7 +75,7 @@ Moreover every :ocv:class:`FaceRecognizer` supports the:
 Setting the Thresholds
 +++++++++++++++++++++++

-Sometimes you run into the situation, when you want to apply a threshold on the prediction. A common scenario in face recognition is to tell, wether a face belongs to the training dataset or if it is unknown. You might wonder, why there's no public API in :ocv:class:`FaceRecognizer` to set the threshold for the prediction, but rest assured: It's supported. It just means there's no generic way in an abstract class to provide an interface for setting/getting the thresholds of *every possible* :ocv:class:`FaceRecognizer` algorithm. The appropriate place to set the thresholds is in the constructor of the specific :ocv:class:`FaceRecognizer` and since every :ocv:class:`FaceRecognizer` is a :ocv:class:`Algorithm` (see above), you can get/set the thresholds at runtime!
+Sometimes you run into the situation, when you want to apply a threshold on the prediction. A common scenario in face recognition is to tell, whether a face belongs to the training dataset or if it is unknown. You might wonder, why there's no public API in :ocv:class:`FaceRecognizer` to set the threshold for the prediction, but rest assured: It's supported. It just means there's no generic way in an abstract class to provide an interface for setting/getting the thresholds of *every possible* :ocv:class:`FaceRecognizer` algorithm. The appropriate place to set the thresholds is in the constructor of the specific :ocv:class:`FaceRecognizer` and since every :ocv:class:`FaceRecognizer` is a :ocv:class:`Algorithm` (see above), you can get/set the thresholds at runtime!

 Here is an example of setting a threshold for the Eigenfaces method, when creating the model:

--- a/modules/contrib/doc/facerec/tutorial/facerec_gender_classification.rst
+++ b/modules/contrib/doc/facerec/tutorial/facerec_gender_classification.rst
@ -71,7 +71,7 @@ You really don't want to create the CSV file by hand. And you really don't want
 Fisherfaces for Gender Classification
 --------------------------------------

-If you want to decide wether a person is *male* or *female*, you have to learn the discriminative features of both classes. The Eigenfaces method is based on the Principal Component Analysis, which is an unsupervised statistical model and not suitable for this task. Please see the Face Recognition tutorial for insights into the algorithms. The Fisherfaces instead yields a class-specific linear projection, so it is much better suited for the gender classification task. `http://www.bytefish.de/blog/gender_classification <http://www.bytefish.de/blog/gender_classification>`_  shows the recognition rate of the Fisherfaces method for gender classification.
+If you want to decide whether a person is *male* or *female*, you have to learn the discriminative features of both classes. The Eigenfaces method is based on the Principal Component Analysis, which is an unsupervised statistical model and not suitable for this task. Please see the Face Recognition tutorial for insights into the algorithms. The Fisherfaces instead yields a class-specific linear projection, so it is much better suited for the gender classification task. `http://www.bytefish.de/blog/gender_classification <http://www.bytefish.de/blog/gender_classification>`_  shows the recognition rate of the Fisherfaces method for gender classification.

 The Fisherfaces method achieves a 98% recognition rate in a subject-independent cross-validation. A subject-independent cross-validation means *images of the person under test are never used for learning the model*. And could you believe it: you can simply use the facerec_fisherfaces demo, that's inlcuded in OpenCV.

--- a/modules/core/doc/core.rst
+++ b/modules/core/doc/core.rst
@ -16,3 +16,4 @@ core. The Core Functionality
    clustering
    utility_and_system_functions_and_macros
    opengl_interop
+    ipp_async_converters
--- a/modules/core/doc/ipp_async_converters.rst
+++ b/modules/core/doc/ipp_async_converters.rst
@ -0,0 +1,72 @@
+Intel® IPP Asynchronous C/C++ Converters
+========================================
+
+.. highlight:: cpp
+
+General Information
+-------------------
+
+This section describes conversion between OpenCV and `Intel® IPP Asynchronous C/C++ <http://software.intel.com/en-us/intel-ipp-preview>`_ library.
+`Getting Started Guide <http://registrationcenter.intel.com/irc_nas/3727/ipp_async_get_started.htm>`_ help you to install the library, configure header and library build paths.
+
+hpp::getHpp
+-----------
+Create ``hppiMatrix`` from ``Mat``.
+
+.. ocv:function:: hppiMatrix* hpp::getHpp(const Mat& src, hppAccel accel)
+
+    :param src: input matrix.
+    :param accel: accelerator instance. Supports type:
+
+            * **HPP_ACCEL_TYPE_CPU** - accelerated by optimized CPU instructions.
+
+            * **HPP_ACCEL_TYPE_GPU** - accelerated by GPU programmable units or fixed-function accelerators.
+
+            * **HPP_ACCEL_TYPE_ANY** - any acceleration or no acceleration available.
+
+This function allocates and initializes the ``hppiMatrix`` that has the same size and type as input matrix, returns the ``hppiMatrix*``.
+
+If you want to use zero-copy for GPU you should to have 4KB aligned matrix data. See details `hppiCreateSharedMatrix <http://software.intel.com/ru-ru/node/501697>`_.
+
+Supports ``CV_8U``, ``CV_16U``, ``CV_16S``, ``CV_32S``, ``CV_32F``, ``CV_64F``.
+
+.. note:: The ``hppiMatrix`` pointer to the image buffer in system memory refers to the ``src.data``. Control the lifetime of the matrix and don't change its data, if there is no special need.
+.. seealso:: :ref:`howToUseIPPAconversion`, :ocv:func:`hpp::getMat`
+
+
+hpp::getMat
+-----------
+Create ``Mat`` from ``hppiMatrix``.
+
+.. ocv:function:: Mat hpp::getMat(hppiMatrix* src, hppAccel accel, int cn)
+
+    :param src: input hppiMatrix.
+
+    :param accel: accelerator instance (see :ocv:func:`hpp::getHpp` for the list of acceleration framework types).
+
+    :param cn: number of channels.
+
+This function allocates and initializes the ``Mat`` that has the same size and type as input matrix.
+Supports ``CV_8U``, ``CV_16U``, ``CV_16S``, ``CV_32S``, ``CV_32F``, ``CV_64F``.
+
+.. seealso:: :ref:`howToUseIPPAconversion`, :ocv:func:`hpp::copyHppToMat`, :ocv:func:`hpp::getHpp`.
+
+
+hpp::copyHppToMat
+-----------------
+Convert ``hppiMatrix`` to ``Mat``.
+
+.. ocv:function:: void hpp::copyHppToMat(hppiMatrix* src, Mat& dst, hppAccel accel, int cn)
+
+    :param src: input hppiMatrix.
+
+    :param dst: output matrix.
+
+    :param accel: accelerator instance (see :ocv:func:`hpp::getHpp` for the list of acceleration framework types).
+
+    :param cn: number of channels.
+
+This function allocates and initializes new matrix (if needed) that has the same size and type as input matrix.
+Supports ``CV_8U``, ``CV_16U``, ``CV_16S``, ``CV_32S``, ``CV_32F``, ``CV_64F``.
+
+.. seealso:: :ref:`howToUseIPPAconversion`, :ocv:func:`hpp::getMat`, :ocv:func:`hpp::getHpp`.
--- a/modules/core/doc/old_basic_structures.rst
+++ b/modules/core/doc/old_basic_structures.rst
@ -1387,7 +1387,7 @@ description rewritten using

    IplImage* color_img = cvCreateImage(cvSize(320,240), IPL_DEPTH_8U, 3);
    IplImage gray_img_hdr, *gray_img;
-    gray_img = (IplImage*)cvReshapeND(color_img, &gray_img_hdr, 1, 0, 0);
+    gray_img = (IplImage*)cvReshapeMatND(color_img, sizeof(gray_img_hdr), &gray_img_hdr, 1, 0, 0);

    ...

@ -1395,6 +1395,18 @@ description rewritten using
    int size[] = { 2, 2, 2 };
    CvMatND* mat = cvCreateMatND(3, size, CV_32F);
    CvMat row_header, *row;
+    row = (CvMat*)cvReshapeMatND(mat, sizeof(row_header), &row_header, 0, 1, 0);
+
+..
+
+In C, the header file for this function includes a convenient macro ``cvReshapeND`` that does away with the ``sizeof_header`` parameter. So, the lines containing the call to ``cvReshapeMatND`` in the examples may be replaced as follow:
+
+::
+
+    gray_img = (IplImage*)cvReshapeND(color_img, &gray_img_hdr, 1, 0, 0);
+
+    ...
+
    row = (CvMat*)cvReshapeND(mat, &row_header, 0, 1, 0);

 ..
--- a/modules/core/include/opencv2/core/ippasync.hpp
+++ b/modules/core/include/opencv2/core/ippasync.hpp
@ -0,0 +1,105 @@
+#ifndef __OPENCV_CORE_IPPASYNC_HPP__
+#define __OPENCV_CORE_IPPASYNC_HPP__
+
+#ifdef HAVE_IPP_A
+
+#include "opencv2/core.hpp"
+#include <ipp_async_op.h>
+#include <ipp_async_accel.h>
+
+namespace cv
+{
+
+namespace hpp
+{
+    //convert OpenCV data type to hppDataType
+    inline int toHppType(const int cvType)
+    {
+        int depth = CV_MAT_DEPTH(cvType);
+        int hppType = depth == CV_8U ? HPP_DATA_TYPE_8U :
+                     depth == CV_16U ? HPP_DATA_TYPE_16U :
+                     depth == CV_16S ? HPP_DATA_TYPE_16S :
+                     depth == CV_32S ? HPP_DATA_TYPE_32S :
+                     depth == CV_32F ? HPP_DATA_TYPE_32F :
+                     depth == CV_64F ? HPP_DATA_TYPE_64F : -1;
+        CV_Assert( hppType >= 0 );
+        return hppType;
+    }
+
+    //convert hppDataType to OpenCV data type
+    inline int toCvType(const int hppType)
+    {
+        int cvType = hppType == HPP_DATA_TYPE_8U ? CV_8U :
+                    hppType == HPP_DATA_TYPE_16U ? CV_16U :
+                    hppType == HPP_DATA_TYPE_16S ? CV_16S :
+                    hppType == HPP_DATA_TYPE_32S ? CV_32S :
+                    hppType == HPP_DATA_TYPE_32F ? CV_32F :
+                    hppType == HPP_DATA_TYPE_64F ? CV_64F : -1;
+        CV_Assert( cvType >= 0 );
+        return cvType;
+    }
+
+    inline void copyHppToMat(hppiMatrix* src, Mat& dst, hppAccel accel, int cn)
+    {
+        hppDataType type;
+        hpp32u width, height;
+        hppStatus sts;
+
+        if (src == NULL)
+            return dst.release();
+
+        sts = hppiInquireMatrix(src, &type, &width, &height);
+
+        CV_Assert( sts == HPP_STATUS_NO_ERROR);
+
+        int matType = CV_MAKETYPE(toCvType(type), cn);
+
+        CV_Assert(width%cn == 0);
+
+        width /= cn;
+
+        dst.create((int)height, (int)width, (int)matType);
+
+        size_t newSize = (size_t)(height*(hpp32u)(dst.step));
+
+        sts = hppiGetMatrixData(accel,src,(hpp32u)(dst.step),dst.data,&newSize);
+
+        CV_Assert( sts == HPP_STATUS_NO_ERROR);
+    }
+
+    //create cv::Mat from hppiMatrix
+    inline Mat getMat(hppiMatrix* src, hppAccel accel, int cn)
+    {
+        Mat dst;
+        copyHppToMat(src, dst, accel, cn);
+        return dst;
+    }
+
+    //create hppiMatrix from cv::Mat
+    inline hppiMatrix* getHpp(const Mat& src, hppAccel accel)
+    {
+        int htype = toHppType(src.type());
+        int cn = src.channels();
+
+        CV_Assert(src.data);
+        hppAccelType accelType = hppQueryAccelType(accel);
+
+        if (accelType!=HPP_ACCEL_TYPE_CPU)
+        {
+            hpp32u pitch, size;
+            hppQueryMatrixAllocParams(accel, src.cols*cn, src.rows, htype, &pitch, &size);
+            if (pitch!=0 && size!=0)
+                if ((int)(src.data)%4096==0 && pitch==(hpp32u)(src.step))
+                {
+                    return hppiCreateSharedMatrix(htype, src.cols*cn, src.rows, src.data, pitch, size);
+                }
+        }
+
+        return hppiCreateMatrix(htype, src.cols*cn, src.rows, src.data, (hpp32s)(src.step));;
+    }
+
+}}
+
+#endif
+
+#endif
--- a/modules/core/include/opencv2/core/private.hpp
+++ b/modules/core/include/opencv2/core/private.hpp
@ -210,13 +210,21 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
 \****************************************************************************************/

 #ifdef HAVE_IPP
-#  include "ipp.h"
+#  ifdef HAVE_IPP_ICV_ONLY
+#    include "ippicv.h"
+#    include "ippicv_fn_map.h"
+#  else
+#    include "ipp.h"
+#  endif
+#  define IPP_VERSION_X100 (IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR)

 static inline IppiSize ippiSize(int width, int height)
 {
    IppiSize size = { width, height };
    return size;
 }
+#else
+#  define IPP_VERSION_X100 0
 #endif

 #ifndef IPPI_CALL
--- a/modules/core/perf/perf_dft.cpp
+++ b/modules/core/perf/perf_dft.cpp
@ -22,5 +22,5 @@ PERF_TEST_P(Size_MatType, dft, TEST_MATS_DFT)

    TEST_CYCLE() dft(src, dst);

-    SANITY_CHECK(dst, 1e-5);
+    SANITY_CHECK(dst, 1e-5, ERROR_RELATIVE);
 }
--- a/modules/core/perf/perf_stat.cpp
+++ b/modules/core/perf/perf_stat.cpp
@ -65,8 +65,8 @@ PERF_TEST_P(Size_MatType, meanStdDev, TYPICAL_MATS)

    TEST_CYCLE() meanStdDev(src, mean, dev);

-    SANITY_CHECK(mean, 1e-6);
-    SANITY_CHECK(dev, 1e-6);
+    SANITY_CHECK(mean, 1e-5, ERROR_RELATIVE);
+    SANITY_CHECK(dev, 1e-5, ERROR_RELATIVE);
 }

 PERF_TEST_P(Size_MatType, meanStdDev_mask, TYPICAL_MATS)
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@ -52,18 +52,6 @@
 namespace cv
 {

-#if ARITHM_USE_IPP
-struct IPPArithmInitializer
-{
-    IPPArithmInitializer(void)
-    {
-        ippStaticInit();
-    }
-};
-
-IPPArithmInitializer ippArithmInitializer;
-#endif
-
 struct NOP {};

 #if CV_SSE2
@ -470,9 +458,12 @@ static void add8u( const uchar* src1, size_t step1,
                   const uchar* src2, size_t step2,
                   uchar* dst, size_t step, Size sz, void* )
 {
-    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-           ippiAdd_8u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0),
-           (vBinOp<uchar, OpAdd<uchar>, IF_SIMD(VAdd<uchar>)>(src1, step1, src2, step2, dst, step, sz)));
+#if (ARITHM_USE_IPP == 1)
+    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
+    if (0 <= ippiAdd_8u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0))
+        return;
+#endif
+    (vBinOp<uchar, OpAdd<uchar>, IF_SIMD(VAdd<uchar>)>(src1, step1, src2, step2, dst, step, sz));
 }

 static void add8s( const schar* src1, size_t step1,
@ -486,18 +477,24 @@ static void add16u( const ushort* src1, size_t step1,
                    const ushort* src2, size_t step2,
                    ushort* dst, size_t step, Size sz, void* )
 {
-    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-           ippiAdd_16u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0),
-           (vBinOp<ushort, OpAdd<ushort>, IF_SIMD(VAdd<ushort>)>(src1, step1, src2, step2, dst, step, sz)));
+#if (ARITHM_USE_IPP == 1)
+    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
+    if (0 <= ippiAdd_16u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0))
+        return;
+#endif
+    (vBinOp<ushort, OpAdd<ushort>, IF_SIMD(VAdd<ushort>)>(src1, step1, src2, step2, dst, step, sz));
 }

 static void add16s( const short* src1, size_t step1,
                    const short* src2, size_t step2,
                    short* dst, size_t step, Size sz, void* )
 {
-    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-           ippiAdd_16s_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0),
-           (vBinOp<short, OpAdd<short>, IF_SIMD(VAdd<short>)>(src1, step1, src2, step2, dst, step, sz)));
+#if (ARITHM_USE_IPP == 1)
+    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
+    if (0 <= ippiAdd_16s_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0))
+        return;
+#endif
+    (vBinOp<short, OpAdd<short>, IF_SIMD(VAdd<short>)>(src1, step1, src2, step2, dst, step, sz));
 }

 static void add32s( const int* src1, size_t step1,
@ -511,9 +508,12 @@ static void add32f( const float* src1, size_t step1,
                    const float* src2, size_t step2,
                    float* dst, size_t step, Size sz, void* )
 {
-    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-           ippiAdd_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz),
-           (vBinOp32<float, OpAdd<float>, IF_SIMD(VAdd<float>)>(src1, step1, src2, step2, dst, step, sz)));
+#if (ARITHM_USE_IPP == 1)
+    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
+    if (0 <= ippiAdd_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+        return;
+#endif
+    (vBinOp32<float, OpAdd<float>, IF_SIMD(VAdd<float>)>(src1, step1, src2, step2, dst, step, sz));
 }

 static void add64f( const double* src1, size_t step1,
@ -527,9 +527,12 @@ static void sub8u( const uchar* src1, size_t step1,
                   const uchar* src2, size_t step2,
                   uchar* dst, size_t step, Size sz, void* )
 {
-    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-           ippiSub_8u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0),
-           (vBinOp<uchar, OpSub<uchar>, IF_SIMD(VSub<uchar>)>(src1, step1, src2, step2, dst, step, sz)));
+#if (ARITHM_USE_IPP == 1)
+    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
+    if (0 <= ippiSub_8u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0))
+        return;
+#endif
+    (vBinOp<uchar, OpSub<uchar>, IF_SIMD(VSub<uchar>)>(src1, step1, src2, step2, dst, step, sz));
 }

 static void sub8s( const schar* src1, size_t step1,
@ -543,18 +546,24 @@ static void sub16u( const ushort* src1, size_t step1,
                    const ushort* src2, size_t step2,
                    ushort* dst, size_t step, Size sz, void* )
 {
-    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-           ippiSub_16u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0),
-           (vBinOp<ushort, OpSub<ushort>, IF_SIMD(VSub<ushort>)>(src1, step1, src2, step2, dst, step, sz)));
+#if (ARITHM_USE_IPP == 1)
+    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
+    if (0 <= ippiSub_16u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0))
+        return;
+#endif
+    (vBinOp<ushort, OpSub<ushort>, IF_SIMD(VSub<ushort>)>(src1, step1, src2, step2, dst, step, sz));
 }

 static void sub16s( const short* src1, size_t step1,
                    const short* src2, size_t step2,
                    short* dst, size_t step, Size sz, void* )
 {
-    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-           ippiSub_16s_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0),
-           (vBinOp<short, OpSub<short>, IF_SIMD(VSub<short>)>(src1, step1, src2, step2, dst, step, sz)));
+#if (ARITHM_USE_IPP == 1)
+    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
+    if (0 <= ippiSub_16s_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0))
+        return;
+#endif
+    (vBinOp<short, OpSub<short>, IF_SIMD(VSub<short>)>(src1, step1, src2, step2, dst, step, sz));
 }

 static void sub32s( const int* src1, size_t step1,
@ -568,9 +577,12 @@ static void sub32f( const float* src1, size_t step1,
                   const float* src2, size_t step2,
                   float* dst, size_t step, Size sz, void* )
 {
-    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-           ippiSub_32f_C1R(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz),
-           (vBinOp32<float, OpSub<float>, IF_SIMD(VSub<float>)>(src1, step1, src2, step2, dst, step, sz)));
+#if (ARITHM_USE_IPP == 1)
+    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
+    if (0 <= ippiSub_32f_C1R(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz))
+        return;
+#endif
+    (vBinOp32<float, OpSub<float>, IF_SIMD(VSub<float>)>(src1, step1, src2, step2, dst, step, sz));
 }

 static void sub64f( const double* src1, size_t step1,
@ -588,26 +600,23 @@ static void max8u( const uchar* src1, size_t step1,
                   uchar* dst, size_t step, Size sz, void* )
 {
 #if (ARITHM_USE_IPP == 1)
-  {
    uchar* s1 = (uchar*)src1;
    uchar* s2 = (uchar*)src2;
    uchar* d  = dst;
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    for(int i = 0; i < sz.height; i++)
+    int i = 0;
+    for(; i < sz.height; i++)
    {
-      ippsMaxEvery_8u(s1, s2, d, sz.width);
-      s1 += step1;
-      s2 += step2;
-      d  += step;
+        if (0 > ippsMaxEvery_8u(s1, s2, d, sz.width))
+            break;
+        s1 += step1;
+        s2 += step2;
+        d  += step;
    }
-  }
-#else
-  vBinOp<uchar, OpMax<uchar>, IF_SIMD(VMax<uchar>)>(src1, step1, src2, step2, dst, step, sz);
+    if (i == sz.height)
+        return;
 #endif
-
-//    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-//           ippiMaxEvery_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
-//           (vBinOp8<uchar, OpMax<uchar>, IF_SIMD(_VMax8u)>(src1, step1, src2, step2, dst, step, sz)));
+    vBinOp<uchar, OpMax<uchar>, IF_SIMD(VMax<uchar>)>(src1, step1, src2, step2, dst, step, sz);
 }

 static void max8s( const schar* src1, size_t step1,
@ -622,26 +631,23 @@ static void max16u( const ushort* src1, size_t step1,
                    ushort* dst, size_t step, Size sz, void* )
 {
 #if (ARITHM_USE_IPP == 1)
-  {
    ushort* s1 = (ushort*)src1;
    ushort* s2 = (ushort*)src2;
    ushort* d  = dst;
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    for(int i = 0; i < sz.height; i++)
+    int i = 0;
+    for(; i < sz.height; i++)
    {
-      ippsMaxEvery_16u(s1, s2, d, sz.width);
-      s1 = (ushort*)((uchar*)s1 + step1);
-      s2 = (ushort*)((uchar*)s2 + step2);
-      d  = (ushort*)((uchar*)d + step);
+        if (0 > ippsMaxEvery_16u(s1, s2, d, sz.width))
+            break;
+        s1 = (ushort*)((uchar*)s1 + step1);
+        s2 = (ushort*)((uchar*)s2 + step2);
+        d  = (ushort*)((uchar*)d + step);
    }
-  }
-#else
-  vBinOp<ushort, OpMax<ushort>, IF_SIMD(VMax<ushort>)>(src1, step1, src2, step2, dst, step, sz);
+    if (i == sz.height)
+        return;
 #endif
-
-//    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-//           ippiMaxEvery_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
-//           (vBinOp16<ushort, OpMax<ushort>, IF_SIMD(_VMax16u)>(src1, step1, src2, step2, dst, step, sz)));
+    vBinOp<ushort, OpMax<ushort>, IF_SIMD(VMax<ushort>)>(src1, step1, src2, step2, dst, step, sz);
 }

 static void max16s( const short* src1, size_t step1,
@ -663,25 +669,23 @@ static void max32f( const float* src1, size_t step1,
                    float* dst, size_t step, Size sz, void* )
 {
 #if (ARITHM_USE_IPP == 1)
-  {
    float* s1 = (float*)src1;
    float* s2 = (float*)src2;
    float* d  = dst;
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    for(int i = 0; i < sz.height; i++)
+    int i = 0;
+    for(; i < sz.height; i++)
    {
-      ippsMaxEvery_32f(s1, s2, d, sz.width);
-      s1 = (float*)((uchar*)s1 + step1);
-      s2 = (float*)((uchar*)s2 + step2);
-      d  = (float*)((uchar*)d + step);
+        if (0 > ippsMaxEvery_32f(s1, s2, d, sz.width))
+            break;
+        s1 = (float*)((uchar*)s1 + step1);
+        s2 = (float*)((uchar*)s2 + step2);
+        d  = (float*)((uchar*)d + step);
    }
-  }
-#else
-  vBinOp32<float, OpMax<float>, IF_SIMD(VMax<float>)>(src1, step1, src2, step2, dst, step, sz);
+    if (i == sz.height)
+        return;
 #endif
-//    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-//           ippiMaxEvery_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
-//           (vBinOp32f<OpMax<float>, IF_SIMD(_VMax32f)>(src1, step1, src2, step2, dst, step, sz)));
+    vBinOp32<float, OpMax<float>, IF_SIMD(VMax<float>)>(src1, step1, src2, step2, dst, step, sz);
 }

 static void max64f( const double* src1, size_t step1,
@ -696,26 +700,23 @@ static void min8u( const uchar* src1, size_t step1,
                   uchar* dst, size_t step, Size sz, void* )
 {
 #if (ARITHM_USE_IPP == 1)
-  {
    uchar* s1 = (uchar*)src1;
    uchar* s2 = (uchar*)src2;
    uchar* d  = dst;
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    for(int i = 0; i < sz.height; i++)
+    int i = 0;
+    for(; i < sz.height; i++)
    {
-      ippsMinEvery_8u(s1, s2, d, sz.width);
-      s1 += step1;
-      s2 += step2;
-      d  += step;
+        if (0 > ippsMinEvery_8u(s1, s2, d, sz.width))
+            break;
+        s1 += step1;
+        s2 += step2;
+        d  += step;
    }
-  }
-#else
-  vBinOp<uchar, OpMin<uchar>, IF_SIMD(VMin<uchar>)>(src1, step1, src2, step2, dst, step, sz);
+    if (i == sz.height)
+        return;
 #endif
-
-//    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-//           ippiMinEvery_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
-//           (vBinOp8<uchar, OpMin<uchar>, IF_SIMD(_VMin8u)>(src1, step1, src2, step2, dst, step, sz)));
+    vBinOp<uchar, OpMin<uchar>, IF_SIMD(VMin<uchar>)>(src1, step1, src2, step2, dst, step, sz);
 }

 static void min8s( const schar* src1, size_t step1,
@ -730,26 +731,23 @@ static void min16u( const ushort* src1, size_t step1,
                    ushort* dst, size_t step, Size sz, void* )
 {
 #if (ARITHM_USE_IPP == 1)
-  {
    ushort* s1 = (ushort*)src1;
    ushort* s2 = (ushort*)src2;
    ushort* d  = dst;
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    for(int i = 0; i < sz.height; i++)
+    int i = 0;
+    for(; i < sz.height; i++)
    {
-      ippsMinEvery_16u(s1, s2, d, sz.width);
-      s1 = (ushort*)((uchar*)s1 + step1);
-      s2 = (ushort*)((uchar*)s2 + step2);
-      d  = (ushort*)((uchar*)d + step);
+        if (0 > ippsMinEvery_16u(s1, s2, d, sz.width))
+            break;
+        s1 = (ushort*)((uchar*)s1 + step1);
+        s2 = (ushort*)((uchar*)s2 + step2);
+        d  = (ushort*)((uchar*)d + step);
    }
-  }
-#else
-  vBinOp<ushort, OpMin<ushort>, IF_SIMD(VMin<ushort>)>(src1, step1, src2, step2, dst, step, sz);
+    if (i == sz.height)
+        return;
 #endif
-
-//    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-//           ippiMinEvery_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
-//           (vBinOp16<ushort, OpMin<ushort>, IF_SIMD(_VMin16u)>(src1, step1, src2, step2, dst, step, sz)));
+    vBinOp<ushort, OpMin<ushort>, IF_SIMD(VMin<ushort>)>(src1, step1, src2, step2, dst, step, sz);
 }

 static void min16s( const short* src1, size_t step1,
@ -771,25 +769,23 @@ static void min32f( const float* src1, size_t step1,
                    float* dst, size_t step, Size sz, void* )
 {
 #if (ARITHM_USE_IPP == 1)
-  {
    float* s1 = (float*)src1;
    float* s2 = (float*)src2;
    float* d  = dst;
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    for(int i = 0; i < sz.height; i++)
+    int i = 0;
+    for(; i < sz.height; i++)
    {
-      ippsMinEvery_32f(s1, s2, d, sz.width);
-      s1 = (float*)((uchar*)s1 + step1);
-      s2 = (float*)((uchar*)s2 + step2);
-      d  = (float*)((uchar*)d + step);
+        if (0 > ippsMinEvery_32f(s1, s2, d, sz.width))
+            break;
+        s1 = (float*)((uchar*)s1 + step1);
+        s2 = (float*)((uchar*)s2 + step2);
+        d  = (float*)((uchar*)d + step);
    }
-  }
-#else
-  vBinOp32<float, OpMin<float>, IF_SIMD(VMin<float>)>(src1, step1, src2, step2, dst, step, sz);
+    if (i == sz.height)
+        return;
 #endif
-//    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-//           ippiMinEvery_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
-//           (vBinOp32f<OpMin<float>, IF_SIMD(_VMin32f)>(src1, step1, src2, step2, dst, step, sz)));
+    vBinOp32<float, OpMin<float>, IF_SIMD(VMin<float>)>(src1, step1, src2, step2, dst, step, sz);
 }

 static void min64f( const double* src1, size_t step1,
@ -803,9 +799,12 @@ static void absdiff8u( const uchar* src1, size_t step1,
                       const uchar* src2, size_t step2,
                       uchar* dst, size_t step, Size sz, void* )
 {
-    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-           ippiAbsDiff_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz),
-           (vBinOp<uchar, OpAbsDiff<uchar>, IF_SIMD(VAbsDiff<uchar>)>(src1, step1, src2, step2, dst, step, sz)));
+#if (ARITHM_USE_IPP == 1)
+    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
+    if (0 <= ippiAbsDiff_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+        return;
+#endif
+    (vBinOp<uchar, OpAbsDiff<uchar>, IF_SIMD(VAbsDiff<uchar>)>(src1, step1, src2, step2, dst, step, sz));
 }

 static void absdiff8s( const schar* src1, size_t step1,
@ -819,9 +818,12 @@ static void absdiff16u( const ushort* src1, size_t step1,
                        const ushort* src2, size_t step2,
                        ushort* dst, size_t step, Size sz, void* )
 {
-    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-           ippiAbsDiff_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz),
-           (vBinOp<ushort, OpAbsDiff<ushort>, IF_SIMD(VAbsDiff<ushort>)>(src1, step1, src2, step2, dst, step, sz)));
+#if (ARITHM_USE_IPP == 1)
+    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
+    if (0 <= ippiAbsDiff_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+        return;
+#endif
+    (vBinOp<ushort, OpAbsDiff<ushort>, IF_SIMD(VAbsDiff<ushort>)>(src1, step1, src2, step2, dst, step, sz));
 }

 static void absdiff16s( const short* src1, size_t step1,
@ -842,9 +844,12 @@ static void absdiff32f( const float* src1, size_t step1,
                        const float* src2, size_t step2,
                        float* dst, size_t step, Size sz, void* )
 {
-    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-           ippiAbsDiff_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz),
-           (vBinOp32<float, OpAbsDiff<float>, IF_SIMD(VAbsDiff<float>)>(src1, step1, src2, step2, dst, step, sz)));
+#if (ARITHM_USE_IPP == 1)
+    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
+    if (0 <= ippiAbsDiff_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+        return;
+#endif
+    (vBinOp32<float, OpAbsDiff<float>, IF_SIMD(VAbsDiff<float>)>(src1, step1, src2, step2, dst, step, sz));
 }

 static void absdiff64f( const double* src1, size_t step1,
@ -859,36 +864,48 @@ static void and8u( const uchar* src1, size_t step1,
                   const uchar* src2, size_t step2,
                   uchar* dst, size_t step, Size sz, void* )
 {
-    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-           ippiAnd_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz),
-           (vBinOp<uchar, OpAnd<uchar>, IF_SIMD(VAnd<uchar>)>(src1, step1, src2, step2, dst, step, sz)));
+#if (ARITHM_USE_IPP == 1)
+    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
+    if (0 <= ippiAnd_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+        return;
+#endif
+    (vBinOp<uchar, OpAnd<uchar>, IF_SIMD(VAnd<uchar>)>(src1, step1, src2, step2, dst, step, sz));
 }

 static void or8u( const uchar* src1, size_t step1,
                  const uchar* src2, size_t step2,
                  uchar* dst, size_t step, Size sz, void* )
 {
-    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-           ippiOr_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz),
-           (vBinOp<uchar, OpOr<uchar>, IF_SIMD(VOr<uchar>)>(src1, step1, src2, step2, dst, step, sz)));
+#if (ARITHM_USE_IPP == 1)
+    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
+    if (0 <= ippiOr_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+        return;
+#endif
+    (vBinOp<uchar, OpOr<uchar>, IF_SIMD(VOr<uchar>)>(src1, step1, src2, step2, dst, step, sz));
 }

 static void xor8u( const uchar* src1, size_t step1,
                   const uchar* src2, size_t step2,
                   uchar* dst, size_t step, Size sz, void* )
 {
-    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-           ippiXor_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz),
-           (vBinOp<uchar, OpXor<uchar>, IF_SIMD(VXor<uchar>)>(src1, step1, src2, step2, dst, step, sz)));
+#if (ARITHM_USE_IPP == 1)
+    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
+    if (0 <= ippiXor_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+        return;
+#endif
+    (vBinOp<uchar, OpXor<uchar>, IF_SIMD(VXor<uchar>)>(src1, step1, src2, step2, dst, step, sz));
 }

 static void not8u( const uchar* src1, size_t step1,
                   const uchar* src2, size_t step2,
                   uchar* dst, size_t step, Size sz, void* )
 {
-    IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); (void *)src2;
-           ippiNot_8u_C1R(src1, (int)step1, dst, (int)step, (IppiSize&)sz),
-           (vBinOp<uchar, OpNot<uchar>, IF_SIMD(VNot<uchar>)>(src1, step1, src2, step2, dst, step, sz)));
+#if (ARITHM_USE_IPP == 1)
+    fixSteps(sz, sizeof(dst[0]), step1, step2, step); (void *)src2;
+    if (0 <= ippiNot_8u_C1R(src1, (int)step1, dst, (int)step, (IppiSize&)sz))
+        return;
+#endif
+    (vBinOp<uchar, OpNot<uchar>, IF_SIMD(VNot<uchar>)>(src1, step1, src2, step2, dst, step, sz));
 }

 /****************************************************************************************\
@ -2369,7 +2386,7 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
    if( op  >= 0 )
    {
        fixSteps(size, sizeof(dst[0]), step1, step2, step);
-        if( ippiCompare_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 )
+        if (0 <= ippiCompare_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op))
            return;
    }
 #endif
@ -2452,7 +2469,7 @@ static void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t
    if( op  >= 0 )
    {
        fixSteps(size, sizeof(dst[0]), step1, step2, step);
-        if( ippiCompare_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 )
+        if (0 <= ippiCompare_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op))
            return;
    }
 #endif
@ -2467,7 +2484,7 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
    if( op  > 0 )
    {
        fixSteps(size, sizeof(dst[0]), step1, step2, step);
-        if( ippiCompare_16s_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 )
+        if (0 <= ippiCompare_16s_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op))
            return;
    }
 #endif
@ -2573,7 +2590,7 @@ static void cmp32f(const float* src1, size_t step1, const float* src2, size_t st
    if( op  >= 0 )
    {
        fixSteps(size, sizeof(dst[0]), step1, step2, step);
-        if( ippiCompare_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 )
+        if (0 <= ippiCompare_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op))
            return;
    }
 #endif
@ -2618,53 +2635,37 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in
 {
    const ocl::Device& dev = ocl::Device::getDefault();
    bool doubleSupport = dev.doubleFPConfig() > 0;
-    int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
-    int type2 = _src2.type();
-
-    if (!haveScalar)
-    {
-        if ( (!doubleSupport && (depth1 == CV_64F || _src2.depth() == CV_64F)) ||
-            !_src1.sameSize(_src2) || type1 != type2)
-            return false;
-    }
-    else
-    {
-        if (cn > 1 || depth1 <= CV_32S) // FIXIT: if (cn > 4): Need to clear CPU-based compare behavior
-            return false;
-    }
+    int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1),
+            type2 = _src2.type(), depth2 = CV_MAT_DEPTH(type2);

    if (!doubleSupport && depth1 == CV_64F)
        return false;

+    if (!haveScalar && (!_src1.sameSize(_src2) || type1 != type2))
+            return false;
+
    int kercn = haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
    // Workaround for bug with "?:" operator in AMD OpenCL compiler
-    bool workaroundForAMD = /*dev.isAMD() &&*/
-            (
-                (depth1 != CV_8U && depth1 != CV_8S)
-            );
-    if (workaroundForAMD)
+    if (depth1 >= CV_16U)
        kercn = 1;

    int scalarcn = kercn == 3 ? 4 : kercn;
-
    const char * const operationMap[] = { "==", ">", ">=", "<", "<=", "!=" };
    char cvt[40];

-    String buildOptions = format(
-            "-D %s -D srcT1=%s -D dstT=%s -D workT=srcT1 -D cn=%d"
-            " -D convertToDT=%s -D OP_CMP -D CMP_OPERATOR=%s -D srcT1_C1=%s"
-            " -D srcT2_C1=%s -D dstT_C1=%s -D workST=%s%s",
-            (haveScalar ? "UNARY_OP" : "BINARY_OP"),
-            ocl::typeToStr(CV_MAKE_TYPE(depth1, kercn)),
-            ocl::typeToStr(CV_8UC(kercn)), kercn,
-            ocl::convertTypeStr(depth1, CV_8U, kercn, cvt),
-            operationMap[op],
-            ocl::typeToStr(depth1), ocl::typeToStr(depth1), ocl::typeToStr(CV_8U),
-            ocl::typeToStr(CV_MAKE_TYPE(depth1, scalarcn)),
-            doubleSupport ? " -D DOUBLE_SUPPORT" : ""
-            );
-
-    ocl::Kernel k("KF", ocl::core::arithm_oclsrc, buildOptions);
+    String opts = format("-D %s -D srcT1=%s -D dstT=%s -D workT=srcT1 -D cn=%d"
+                         " -D convertToDT=%s -D OP_CMP -D CMP_OPERATOR=%s -D srcT1_C1=%s"
+                         " -D srcT2_C1=%s -D dstT_C1=%s -D workST=%s%s",
+                         haveScalar ? "UNARY_OP" : "BINARY_OP",
+                         ocl::typeToStr(CV_MAKE_TYPE(depth1, kercn)),
+                         ocl::typeToStr(CV_8UC(kercn)), kercn,
+                         ocl::convertTypeStr(depth1, CV_8U, kercn, cvt),
+                         operationMap[op], ocl::typeToStr(depth1),
+                         ocl::typeToStr(depth1), ocl::typeToStr(CV_8U),
+                         ocl::typeToStr(CV_MAKE_TYPE(depth1, scalarcn)),
+                         doubleSupport ? " -D DOUBLE_SUPPORT" : "");
+
+    ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
    if (k.empty())
        return false;

@ -2675,24 +2676,43 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in

    if (haveScalar)
    {
-        size_t esz = CV_ELEM_SIZE1(type1)*scalarcn;
-        double buf[4]={0,0,0,0};
-        Mat src2sc = _src2.getMat();
+        size_t esz = CV_ELEM_SIZE1(type1) * scalarcn;
+        double buf[4] = { 0, 0, 0, 0 };
+        Mat src2 = _src2.getMat();
+
+        if( depth1 > CV_32S )
+            convertAndUnrollScalar( src2, depth1, (uchar *)buf, kercn );
+        else
+        {
+            double fval = 0;
+            getConvertFunc(depth2, CV_64F)(src2.data, 0, 0, 0, (uchar *)&fval, 0, Size(1, 1), 0);
+            if( fval < getMinVal(depth1) )
+                return dst.setTo(Scalar::all(op == CMP_GT || op == CMP_GE || op == CMP_NE ? 255 : 0)), true;
+
+            if( fval > getMaxVal(depth1) )
+                return dst.setTo(Scalar::all(op == CMP_LT || op == CMP_LE || op == CMP_NE ? 255 : 0)), true;

-        if (!src2sc.empty())
-            convertAndUnrollScalar(src2sc, type1, (uchar*)buf, 1);
+            int ival = cvRound(fval);
+            if( fval != ival )
+            {
+                if( op == CMP_LT || op == CMP_GE )
+                    ival = cvCeil(fval);
+                else if( op == CMP_LE || op == CMP_GT )
+                    ival = cvFloor(fval);
+                else
+                    return dst.setTo(Scalar::all(op == CMP_NE ? 255 : 0)), true;
+            }
+            convertAndUnrollScalar(Mat(1, 1, CV_32S, &ival), depth1, (uchar *)buf, kercn);
+        }

        ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, 0, buf, esz);

        k.args(ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn),
-               ocl::KernelArg::WriteOnly(dst, cn, kercn),
-               scalararg);
+               ocl::KernelArg::WriteOnly(dst, cn, kercn), scalararg);
    }
    else
    {
-        CV_DbgAssert(type1 == type2);
        UMat src2 = _src2.getUMat();
-        CV_DbgAssert(size == src2.size());

        k.args(ocl::KernelArg::ReadOnlyNoSize(src1),
               ocl::KernelArg::ReadOnlyNoSize(src2),
--- a/modules/core/src/dxt.cpp
+++ b/modules/core/src/dxt.cpp
@ -53,7 +53,7 @@ namespace cv
 # pragma warning(disable: 4748)
 #endif

-#if defined HAVE_IPP && IPP_VERSION_MAJOR*100 + IPP_VERSION_MINOR >= 701
+#if IPP_VERSION_X100 >= 701
 #define USE_IPP_DFT 1
 #else
 #undef USE_IPP_DFT
--- a/modules/core/src/mathfuncs.cpp
+++ b/modules/core/src/mathfuncs.cpp
@ -812,8 +812,6 @@ typedef union
 }
 DBLINT;

-#ifndef HAVE_IPP
-
 #define EXPTAB_SCALE 6
 #define EXPTAB_MASK  ((1 << EXPTAB_SCALE) - 1)

@ -1275,13 +1273,26 @@ static void Exp_64f( const double *_x, double *y, int n )
 #undef EXPTAB_MASK
 #undef EXPPOLY_32F_A0

-#else
+#ifdef HAVE_IPP
+static void Exp_32f_ipp(const float *x, float *y, int n)
+{
+    if (0 <= ippsExp_32f_A21(x, y, n))
+        return;
+    Exp_32f(x, y, n);
+}

-#define Exp_32f ippsExp_32f_A21
-#define Exp_64f ippsExp_64f_A50
+static void Exp_64f_ipp(const double *x, double *y, int n)
+{
+    if (0 <= ippsExp_64f_A50(x, y, n))
+        return;
+    Exp_64f(x, y, n);
+}

+#define Exp_32f Exp_32f_ipp
+#define Exp_64f Exp_64f_ipp
 #endif

+
 void exp( InputArray _src, OutputArray _dst )
 {
    int type = _src.type(), depth = _src.depth(), cn = _src.channels();
@ -1302,9 +1313,9 @@ void exp( InputArray _src, OutputArray _dst )
    for( size_t i = 0; i < it.nplanes; i++, ++it )
    {
        if( depth == CV_32F )
-            Exp_32f( (const float*)ptrs[0], (float*)ptrs[1], len );
+            Exp_32f((const float*)ptrs[0], (float*)ptrs[1], len);
        else
-            Exp_64f( (const double*)ptrs[0], (double*)ptrs[1], len );
+            Exp_64f((const double*)ptrs[0], (double*)ptrs[1], len);
    }
 }

@ -1313,8 +1324,6 @@ void exp( InputArray _src, OutputArray _dst )
 *                                          L O G                                         *
 \****************************************************************************************/

-#ifndef HAVE_IPP
-
 #define LOGTAB_SCALE    8
 #define LOGTAB_MASK         ((1 << LOGTAB_SCALE) - 1)
 #define LOGTAB_MASK2        ((1 << (20 - LOGTAB_SCALE)) - 1)
@ -1922,11 +1931,23 @@ static void Log_64f( const double *x, double *y, int n )
    }
 }

-#else
+#ifdef HAVE_IPP
+static void Log_32f_ipp(const float *x, float *y, int n)
+{
+    if (0 <= ippsLn_32f_A21(x, y, n))
+        return;
+    Log_32f(x, y, n);
+}

-#define Log_32f ippsLn_32f_A21
-#define Log_64f ippsLn_64f_A50
+static void Log_64f_ipp(const double *x, double *y, int n)
+{
+    if (0 <= ippsLn_64f_A50(x, y, n))
+        return;
+    Log_64f(x, y, n);
+}

+#define Log_32f Log_32f_ipp
+#define Log_64f Log_64f_ipp
 #endif

 void log( InputArray _src, OutputArray _dst )
--- a/modules/core/src/matmul.cpp
+++ b/modules/core/src/matmul.cpp
@ -44,10 +44,6 @@
 #include "opencl_kernels.hpp"
 #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"

-#ifdef HAVE_IPP
-#include "ippversion.h"
-#endif
-
 namespace cv
 {

@ -2803,11 +2799,11 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
 {
    double r = 0;
 #if ARITHM_USE_IPP
-    ippiDotProd_8u64f_C1R(src1, (int)(len*sizeof(src1[0])),
-                          src2, (int)(len*sizeof(src2[0])),
-                          ippiSize(len, 1), &r);
-    return r;
-#else
+    if (0 <= ippiDotProd_8u64f_C1R(src1, (int)(len*sizeof(src1[0])),
+                                   src2, (int)(len*sizeof(src2[0])),
+                                   ippiSize(len, 1), &r))
+        return r;
+#endif
    int i = 0;

 #if CV_SSE2
@ -2853,7 +2849,6 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
    }
 #endif
    return r + dotProd_(src1, src2, len - i);
-#endif
 }


@ -2864,48 +2859,52 @@ static double dotProd_8s(const schar* src1, const schar* src2, int len)

 static double dotProd_16u(const ushort* src1, const ushort* src2, int len)
 {
+#if (ARITHM_USE_IPP == 1)
    double r = 0;
-    IF_IPP(ippiDotProd_16u64f_C1R(src1, (int)(len*sizeof(src1[0])),
-                                  src2, (int)(len*sizeof(src2[0])),
-                                  ippiSize(len, 1), &r),
-           r = dotProd_(src1, src2, len));
-    return r;
+    if (0 <= ippiDotProd_16u64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
+        return r;
+#endif
+    return dotProd_(src1, src2, len);
 }

 static double dotProd_16s(const short* src1, const short* src2, int len)
 {
+#if (ARITHM_USE_IPP == 1)
    double r = 0;
-    IF_IPP(ippiDotProd_16s64f_C1R(src1, (int)(len*sizeof(src1[0])),
-                                  src2, (int)(len*sizeof(src2[0])),
-                                  ippiSize(len, 1), &r),
-           r = dotProd_(src1, src2, len));
-    return r;
+    if (0 <= ippiDotProd_16s64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
+        return r;
+#endif
+    return dotProd_(src1, src2, len);
 }

 static double dotProd_32s(const int* src1, const int* src2, int len)
 {
+#if (ARITHM_USE_IPP == 1)
    double r = 0;
-    IF_IPP(ippiDotProd_32s64f_C1R(src1, (int)(len*sizeof(src1[0])),
-                                  src2, (int)(len*sizeof(src2[0])),
-                                  ippiSize(len, 1), &r),
-           r = dotProd_(src1, src2, len));
-    return r;
+    if (0 <= ippiDotProd_32s64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
+        return r;
+#endif
+    return dotProd_(src1, src2, len);
 }

 static double dotProd_32f(const float* src1, const float* src2, int len)
 {
+#if (ARITHM_USE_IPP == 1)
    double r = 0;
-    IF_IPP(ippsDotProd_32f64f(src1, src2, len, &r),
-           r = dotProd_(src1, src2, len));
-    return r;
+    if (0 <= ippsDotProd_32f64f(src1, src2, len, &r))
+        return r;
+#endif
+    return dotProd_(src1, src2, len);
 }

 static double dotProd_64f(const double* src1, const double* src2, int len)
 {
+#if (ARITHM_USE_IPP == 1)
    double r = 0;
-    IF_IPP(ippsDotProd_64f(src1, src2, len, &r),
-           r = dotProd_(src1, src2, len));
-    return r;
+    if (0 <= ippsDotProd_64f(src1, src2, len, &r))
+        return r;
+#endif
+    return dotProd_(src1, src2, len);
 }


--- a/modules/core/src/out.cpp
+++ b/modules/core/src/out.cpp
@ -257,7 +257,7 @@ namespace
        {
            char braces[5] = {'\0', '\0', ';', '\0', '\0'};
            return cv::makePtr<FormattedImpl>("[", "]", mtx, &*braces,
-                mtx.cols == 1 || !multiline, mtx.depth() == CV_64F ? prec64f : prec32f );
+                mtx.rows == 1 || !multiline, mtx.depth() == CV_64F ? prec64f : prec32f );
        }
    };

@ -271,7 +271,7 @@ namespace
            if (mtx.cols == 1)
                braces[0] = braces[1] = '\0';
            return cv::makePtr<FormattedImpl>("[", "]", mtx, &*braces,
-                mtx.cols*mtx.channels() == 1 || !multiline, mtx.depth() == CV_64F ? prec64f : prec32f );
+                mtx.rows*mtx.channels() == 1 || !multiline, mtx.depth() == CV_64F ? prec64f : prec32f );
        }
    };

@ -290,7 +290,7 @@ namespace
                braces[0] = braces[1] = '\0';
            return cv::makePtr<FormattedImpl>("array([",
                cv::format("], type='%s')", numpyTypes[mtx.depth()]), mtx, &*braces,
-                mtx.cols*mtx.channels() == 1 || !multiline, mtx.depth() == CV_64F ? prec64f : prec32f );
+                mtx.rows*mtx.channels() == 1 || !multiline, mtx.depth() == CV_64F ? prec64f : prec32f );
        }
    };

@ -303,7 +303,7 @@ namespace
            char braces[5] = {'\0', '\0', '\0', '\0', '\0'};
            return cv::makePtr<FormattedImpl>(cv::String(),
                mtx.rows > 1 ? cv::String("\n") : cv::String(), mtx, &*braces,
-                mtx.cols*mtx.channels() == 1 || !multiline, mtx.depth() == CV_64F ? prec64f : prec32f );
+                mtx.rows*mtx.channels() == 1 || !multiline, mtx.depth() == CV_64F ? prec64f : prec32f );
        }
    };

@ -315,7 +315,7 @@ namespace
        {
            char braces[5] = {'\0', '\0', ',', '\0', '\0'};
            return cv::makePtr<FormattedImpl>("{", "}", mtx, &*braces,
-                mtx.cols == 1 || !multiline, mtx.depth() == CV_64F ? prec64f : prec32f );
+                mtx.rows == 1 || !multiline, mtx.depth() == CV_64F ? prec64f : prec32f );
        }
    };

--- a/modules/core/src/persistence.cpp
+++ b/modules/core/src/persistence.cpp
@ -5486,11 +5486,27 @@ internal::WriteStructContext::WriteStructContext(FileStorage& _fs,
 {
    cvStartWriteStruct(**fs, !name.empty() ? name.c_str() : 0, flags,
                       !typeName.empty() ? typeName.c_str() : 0);
+    fs->elname = String();
+    if ((flags & FileNode::TYPE_MASK) == FileNode::SEQ)
+    {
+        fs->state = FileStorage::VALUE_EXPECTED;
+        fs->structs.push_back('[');
+    }
+    else
+    {
+        fs->state = FileStorage::NAME_EXPECTED + FileStorage::INSIDE_MAP;
+        fs->structs.push_back('{');
+    }
 }

 internal::WriteStructContext::~WriteStructContext()
 {
    cvEndWriteStruct(**fs);
+    fs->structs.pop_back();
+    fs->state = fs->structs.empty() || fs->structs.back() == '{' ?
+        FileStorage::NAME_EXPECTED + FileStorage::INSIDE_MAP :
+        FileStorage::VALUE_EXPECTED;
+    fs->elname = String();
 }


--- a/modules/core/src/precomp.hpp
+++ b/modules/core/src/precomp.hpp
@ -199,10 +199,8 @@ enum { BLOCK_SIZE = 1024 };

 #if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7)
 #define ARITHM_USE_IPP 1
-#define IF_IPP(then_call, else_call) then_call
 #else
 #define ARITHM_USE_IPP 0
-#define IF_IPP(then_call, else_call) else_call
 #endif

 inline bool checkScalar(const Mat& sc, int atype, int sckind, int akind)
--- a/modules/core/src/stat.cpp
+++ b/modules/core/src/stat.cpp
@ -972,7 +972,9 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input
            ippiMeanStdDevFuncC1 ippFuncC1 =
            type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R :
            type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R :
-            //type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0
+#if (IPP_VERSION_X100 >= 801)
+            type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0
+#endif
            0;
            if( ippFuncC1 )
            {
@ -2111,8 +2113,10 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
                type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C3R :
                type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C4R :
                type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C1R :
-                //type == CV_16SC3 ? (ippiNormFunc)ippiNorm_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
-                //type == CV_16SC4 ? (ippiNormFunc)ippiNorm_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
+#if (IPP_VERSION_X100 >= 801)
+                type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
+                type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
+#endif
                type == CV_32FC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C1R :
                type == CV_32FC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C3R :
                type == CV_32FC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C4R :
@ -2360,7 +2364,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
        Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();

-        normType &= 7;
+        normType &= NORM_TYPE_MASK;
        CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
                ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
        size_t total_size = src1.total();
@ -2541,8 +2545,10 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
                type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C3R :
                type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C4R :
                type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R :
-                //type == CV_16SC3 ? (ippiNormDiffFunc)ippiNormDiff_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
-                //type == CV_16SC4 ? (ippiNormDiffFunc)ippiNormDiff_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
+#if (IPP_VERSION_X100 >= 801)
+                type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
+                type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
+#endif
                type == CV_32FC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R :
                type == CV_32FC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C3R :
                type == CV_32FC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C4R :
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@ -274,7 +274,14 @@ volatile bool useOptimizedFlag = true;
 #ifdef HAVE_IPP
 struct IPPInitializer
 {
-    IPPInitializer(void) { ippStaticInit(); }
+    IPPInitializer(void)
+    {
+#if IPP_VERSION_MAJOR >= 8
+        ippInit();
+#else
+        ippStaticInit();
+#endif
+    }
 };

 IPPInitializer ippInitializer;
@ -390,17 +397,17 @@ int64 getCPUTickCount(void)

 #else

-#ifdef HAVE_IPP
-int64 getCPUTickCount(void)
-{
-    return ippGetCpuClocks();
-}
-#else
+//#ifdef HAVE_IPP
+//int64 getCPUTickCount(void)
+//{
+//    return ippGetCpuClocks();
+//}
+//#else
 int64 getCPUTickCount(void)
 {
    return getTickCount();
 }
-#endif
+//#endif

 #endif

--- a/modules/core/src/umatrix.cpp
+++ b/modules/core/src/umatrix.cpp
@ -88,8 +88,10 @@ void UMatData::unlock()

 MatAllocator* UMat::getStdAllocator()
 {
+#ifdef HAVE_OPENCL
    if( ocl::haveOpenCL() && ocl::useOpenCL() )
        return ocl::getOpenCLAllocator();
+#endif
    return Mat::getStdAllocator();
 }

@ -665,7 +667,7 @@ void UMat::copyTo(OutputArray _dst, InputArray _mask) const
        copyTo(_dst);
        return;
    }
-
+#ifdef HAVE_OPENCL
    int cn = channels(), mtype = _mask.type(), mdepth = CV_MAT_DEPTH(mtype), mcn = CV_MAT_CN(mtype);
    CV_Assert( mdepth == CV_8U && (mcn == 1 || mcn == cn) );

@ -692,7 +694,7 @@ void UMat::copyTo(OutputArray _dst, InputArray _mask) const
                return;
        }
    }
-
+#endif
    Mat src = getMat(ACCESS_READ);
    src.copyTo(_dst, _mask);
 }
@ -713,7 +715,7 @@ void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) con
        copyTo(_dst);
        return;
    }
-
+#ifdef HAVE_OPENCL
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
    bool needDouble = sdepth == CV_64F || ddepth == CV_64F;
    if( dims <= 2 && cn && _dst.isUMat() && ocl::useOpenCL() &&
@ -748,7 +750,7 @@ void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) con
                return;
        }
    }
-
+#endif
    Mat m = getMat(ACCESS_READ);
    m.convertTo(_dst, _type, alpha, beta);
 }
@ -756,7 +758,9 @@ void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) con
 UMat& UMat::setTo(InputArray _value, InputArray _mask)
 {
    bool haveMask = !_mask.empty();
+#ifdef HAVE_OPENCL
    int tp = type(), cn = CV_MAT_CN(tp);
+
    if( dims <= 2 && cn <= 4 && CV_MAT_DEPTH(tp) < CV_64F && ocl::useOpenCL() )
    {
        Mat value = _value.getMat();
@ -795,6 +799,7 @@ UMat& UMat::setTo(InputArray _value, InputArray _mask)
                return *this;
        }
    }
+#endif
    Mat m = getMat(haveMask ? ACCESS_RW : ACCESS_WRITE);
    m.setTo(_value, _mask);
    return *this;
--- a/modules/core/test/test_arithm.cpp
+++ b/modules/core/test/test_arithm.cpp
@ -1362,7 +1362,8 @@ TEST_P(ElemWiseTest, accuracy)

        double maxErr = op->getMaxErr(depth);
        vector<int> pos;
-        ASSERT_PRED_FORMAT2(cvtest::MatComparator(maxErr, op->context), dst0, dst) << "\nsrc[0] ~ " << cvtest::MatInfo(!src.empty() ? src[0] : Mat()) << "\ntestCase #" << testIdx << "\n";
+        ASSERT_PRED_FORMAT2(cvtest::MatComparator(maxErr, op->context), dst0, dst) << "\nsrc[0] ~ " <<
+            cvtest::MatInfo(!src.empty() ? src[0] : Mat()) << "\ntestCase #" << testIdx << "\n";
    }
 }

@ -1500,7 +1501,7 @@ protected:
                }
                Mat d1;
                d.convertTo(d1, depth);
-                CV_Assert( norm(c, d1, CV_C) <= DBL_EPSILON );
+                CV_Assert( cvtest::norm(c, d1, CV_C) <= DBL_EPSILON );
            }

            Mat_<uchar> tmpSrc(100,100);
@ -1574,7 +1575,7 @@ TEST_P(Mul1, One)

    cv::multiply(3, src, dst);

-    ASSERT_EQ(0, cv::norm(dst, ref_dst, cv::NORM_INF));
+    ASSERT_EQ(0, cvtest::norm(dst, ref_dst, cv::NORM_INF));
 }

 INSTANTIATE_TEST_CASE_P(Arithm, Mul1, testing::Values(Size(2, 2), Size(1, 1)));
--- a/modules/core/test/test_dxt.cpp
+++ b/modules/core/test/test_dxt.cpp
@ -855,7 +855,7 @@ protected:
            merge(mv, 2, srcz);
            dft(srcz, dstz);
            dft(src, dst, DFT_COMPLEX_OUTPUT);
-            if(norm(dst, dstz, NORM_INF) > 1e-3)
+            if (cvtest::norm(dst, dstz, NORM_INF) > 1e-3)
            {
                cout << "actual:\n" << dst << endl << endl;
                cout << "reference:\n" << dstz << endl << endl;
--- a/modules/core/test/test_eigen.cpp
+++ b/modules/core/test/test_eigen.cpp
@ -175,7 +175,7 @@ bool Core_EigenTest::check_pair_count(const cv::Mat& src, const cv::Mat& evalues
    {
        std::cout << endl; std::cout << "Checking sizes of eigen values matrix " << evalues << "..." << endl;
        std::cout << "Number of rows: " << evalues.rows << "   Number of cols: " << evalues.cols << endl;
-        std:: cout << "Size of src symmetric matrix: " << src.rows << " * " << src.cols << endl; std::cout << endl;
+        std::cout << "Size of src symmetric matrix: " << src.rows << " * " << src.cols << endl; std::cout << endl;
        CV_Error(CORE_EIGEN_ERROR_COUNT, MESSAGE_ERROR_COUNT);
        return false;
    }
@ -187,7 +187,7 @@ bool Core_EigenTest::check_pair_count(const cv::Mat& src, const cv::Mat& evalues
    int n = src.rows, s = sign(high_index);
    int right_eigen_pair_count = n - max<int>(0, low_index) - ((int)((n/2.0)*(s*s-s)) + (1+s-s*s)*(n - (high_index+1)));

-    if (!((evectors.rows == right_eigen_pair_count) && (evectors.cols == right_eigen_pair_count)))
+    if (!(evectors.rows == right_eigen_pair_count && evectors.cols == right_eigen_pair_count))
    {
        std::cout << endl; std::cout << "Checking sizes of eigen vectors matrix " << evectors << "..." << endl;
        std::cout << "Number of rows: " << evectors.rows << "   Number of cols: " << evectors.cols << endl;
@ -196,7 +196,7 @@ bool Core_EigenTest::check_pair_count(const cv::Mat& src, const cv::Mat& evalues
        return false;
    }

-    if (!((evalues.rows == right_eigen_pair_count) && (evalues.cols == 1)))
+    if (!(evalues.rows == right_eigen_pair_count && evalues.cols == 1))
    {
        std::cout << endl; std::cout << "Checking sizes of eigen values matrix " << evalues << "..." << endl;
        std::cout << "Number of rows: " << evalues.rows << "   Number of cols: " << evalues.cols << endl;
@ -212,9 +212,9 @@ void Core_EigenTest::print_information(const size_t norm_idx, const cv::Mat& src
 {
    switch (NORM_TYPE[norm_idx])
    {
-    case cv::NORM_L1: {std::cout << "L1"; break;}
-    case cv::NORM_L2: {std::cout << "L2"; break;}
-    case cv::NORM_INF: {std::cout << "INF"; break;}
+    case cv::NORM_L1: std::cout << "L1"; break;
+    case cv::NORM_L2: std::cout << "L2"; break;
+    case cv::NORM_INF: std::cout << "INF"; break;
    default: break;
    }

@ -234,7 +234,7 @@ bool Core_EigenTest::check_orthogonality(const cv::Mat& U)

    for (int i = 0; i < COUNT_NORM_TYPES; ++i)
    {
-        double diff = cv::norm(UUt, E, NORM_TYPE[i]);
+        double diff = cvtest::norm(UUt, E, NORM_TYPE[i]);
        if (diff > eps_vec)
        {
            std::cout << endl; std::cout << "Checking orthogonality of matrix " << U << ": ";
@ -271,12 +271,12 @@ bool Core_EigenTest::check_pairs_order(const cv::Mat& eigen_values)
            for (int i = 0; i < (int)(eigen_values.total() - 1); ++i)
                if (!(eigen_values.at<double>(i, 0) > eigen_values.at<double>(i+1, 0)))
                {
-                std::cout << endl; std::cout << "Checking order of eigen values vector " << eigen_values << "..." << endl;
-                std::cout << "Pair of indexes with non ascending of eigen values: (" << i << ", " << i+1 << ")." << endl;
-                std::cout << endl;
-                CV_Error(CORE_EIGEN_ERROR_ORDER, "Eigen values are not sorted in ascending order.");
-                return false;
-            }
+                    std::cout << endl; std::cout << "Checking order of eigen values vector " << eigen_values << "..." << endl;
+                    std::cout << "Pair of indexes with non ascending of eigen values: (" << i << ", " << i+1 << ")." << endl;
+                    std::cout << endl;
+                    CV_Error(CORE_EIGEN_ERROR_ORDER, "Eigen values are not sorted in ascending order.");
+                    return false;
+                }

            break;
        }
@ -296,11 +296,14 @@ bool Core_EigenTest::test_pairs(const cv::Mat& src)

    cv::eigen(src, eigen_values, eigen_vectors);

-    if (!check_pair_count(src, eigen_values, eigen_vectors)) return false;
+    if (!check_pair_count(src, eigen_values, eigen_vectors))
+        return false;

-    if (!check_orthogonality (eigen_vectors)) return false;
+    if (!check_orthogonality (eigen_vectors))
+        return false;

-    if (!check_pairs_order(eigen_values)) return false;
+    if (!check_pairs_order(eigen_values))
+        return false;

    cv::Mat eigen_vectors_t; cv::transpose(eigen_vectors, eigen_vectors_t);

@ -340,7 +343,7 @@ bool Core_EigenTest::test_pairs(const cv::Mat& src)

    for (int i = 0; i < COUNT_NORM_TYPES; ++i)
    {
-        double diff = cv::norm(disparity, NORM_TYPE[i]);
+        double diff = cvtest::norm(disparity, NORM_TYPE[i]);
        if (diff > eps_vec)
        {
            std::cout << endl; std::cout << "Checking accuracy of eigen vectors computing for matrix " << src << ": ";
@ -369,7 +372,7 @@ bool Core_EigenTest::test_values(const cv::Mat& src)

    for (int i = 0; i < COUNT_NORM_TYPES; ++i)
    {
-        double diff = cv::norm(eigen_values_1, eigen_values_2, NORM_TYPE[i]);
+        double diff = cvtest::norm(eigen_values_1, eigen_values_2, NORM_TYPE[i]);
        if (diff > eps_val)
        {
            std::cout << endl; std::cout << "Checking accuracy of eigen values computing for matrix " << src << ": ";
--- a/modules/core/test/test_io.cpp
+++ b/modules/core/test/test_io.cpp
@ -380,6 +380,40 @@ TEST(Core_InputOutput, write_read_consistency) { Core_IOTest test; test.safe_run

 extern void testFormatter();

+
+struct UserDefinedType
+{
+    int a;
+    float b;
+};
+
+static inline bool operator==(const UserDefinedType &x,
+                              const UserDefinedType &y) {
+    return (x.a == y.a) && (x.b == y.b);
+}
+
+static inline void write(FileStorage &fs,
+                         const String&,
+                         const UserDefinedType &value)
+{
+    fs << "{:" << "a" << value.a << "b" << value.b << "}";
+}
+
+static inline void read(const FileNode& node,
+                        UserDefinedType& value,
+                        const UserDefinedType& default_value
+                          = UserDefinedType()) {
+    if(node.empty())
+    {
+        value = default_value;
+    }
+    else
+    {
+        node["a"] >> value.a;
+        node["b"] >> value.b;
+    }
+}
+
 class CV_MiscIOTest : public cvtest::BaseTest
 {
 public:
@ -393,11 +427,14 @@ protected:
            string fname = cv::tempfile(".xml");
            vector<int> mi, mi2, mi3, mi4;
            vector<Mat> mv, mv2, mv3, mv4;
+            vector<UserDefinedType> vudt, vudt2, vudt3, vudt4;
            Mat m(10, 9, CV_32F);
            Mat empty;
+            UserDefinedType udt = { 8, 3.3f };
            randu(m, 0, 1);
            mi3.push_back(5);
            mv3.push_back(m);
+            vudt3.push_back(udt);
            Point_<float> p1(1.1f, 2.2f), op1;
            Point3i p2(3, 4, 5), op2;
            Size s1(6, 7), os1;
@ -412,6 +449,8 @@ protected:
            fs << "mv" << mv;
            fs << "mi3" << mi3;
            fs << "mv3" << mv3;
+            fs << "vudt" << vudt;
+            fs << "vudt3" << vudt3;
            fs << "empty" << empty;
            fs << "p1" << p1;
            fs << "p2" << p2;
@ -428,6 +467,8 @@ protected:
            fs["mv"] >> mv2;
            fs["mi3"] >> mi4;
            fs["mv3"] >> mv4;
+            fs["vudt"] >> vudt2;
+            fs["vudt3"] >> vudt4;
            fs["empty"] >> empty;
            fs["p1"] >> op1;
            fs["p2"] >> op2;
@ -439,9 +480,11 @@ protected:
            fs["g1"] >> og1;
            CV_Assert( mi2.empty() );
            CV_Assert( mv2.empty() );
-            CV_Assert( norm(mi3, mi4, CV_C) == 0 );
+            CV_Assert( cvtest::norm(Mat(mi3), Mat(mi4), CV_C) == 0 );
            CV_Assert( mv4.size() == 1 );
-            double n = norm(mv3[0], mv4[0], CV_C);
+            double n = cvtest::norm(mv3[0], mv4[0], CV_C);
+            CV_Assert( vudt2.empty() );
+            CV_Assert( vudt3 == vudt4 );
            CV_Assert( n == 0 );
            CV_Assert( op1 == p1 );
            CV_Assert( op2 == p2 );
--- a/modules/core/test/test_ippasync.cpp
+++ b/modules/core/test/test_ippasync.cpp
@ -0,0 +1,179 @@
+#include "test_precomp.hpp"
+#include "opencv2/ts/ocl_test.hpp"
+
+#ifdef HAVE_IPP_A
+#include "opencv2/core/ippasync.hpp"
+
+using namespace cv;
+using namespace std;
+using namespace cvtest;
+
+namespace cvtest {
+namespace ocl {
+
+PARAM_TEST_CASE(IPPAsync, MatDepth, Channels, hppAccelType)
+{
+    int type;
+    int cn;
+    int depth;
+    hppAccelType accelType;
+
+    Mat matrix, result;
+    hppiMatrix * hppMat;
+    hppAccel accel;
+    hppiVirtualMatrix * virtMatrix;
+    hppStatus sts;
+
+    virtual void SetUp()
+    {
+        type = CV_MAKE_TYPE(GET_PARAM(0), GET_PARAM(1));
+        depth = GET_PARAM(0);
+        cn = GET_PARAM(1);
+        accelType = GET_PARAM(2);
+    }
+
+    virtual void generateTestData()
+    {
+        Size matrix_Size = randomSize(2, 100);
+        const double upValue = 100;
+
+        matrix = randomMat(matrix_Size, type, -upValue, upValue);
+    }
+
+    void Near(double threshold = 0.0)
+    {
+        EXPECT_MAT_NEAR(matrix, result, threshold);
+    }
+};
+
+TEST_P(IPPAsync, accuracy)
+{
+    sts = hppCreateInstance(accelType, 0, &accel);
+    if (sts!=HPP_STATUS_NO_ERROR) printf("hppStatus = %d\n",sts);
+    CV_Assert(sts==HPP_STATUS_NO_ERROR);
+
+    virtMatrix = hppiCreateVirtualMatrices(accel, 2);
+
+    for (int j = 0; j < test_loop_times; j++)
+    {
+        generateTestData();
+        hppMat = hpp::getHpp(matrix,accel);
+
+        hppScalar a = 3;
+
+        sts = hppiAddC(accel, hppMat, a, 0, virtMatrix[0]);
+        CV_Assert(sts==HPP_STATUS_NO_ERROR);
+        sts = hppiSubC(accel, virtMatrix[0], a, 0, virtMatrix[1]);
+        CV_Assert(sts==HPP_STATUS_NO_ERROR);
+
+        sts = hppWait(accel, HPP_TIME_OUT_INFINITE);
+        CV_Assert(sts==HPP_STATUS_NO_ERROR);
+
+        result = hpp::getMat(virtMatrix[1], accel, cn);
+
+        Near(5.0e-6);
+
+        sts =  hppiFreeMatrix(hppMat);
+        CV_Assert(sts==HPP_STATUS_NO_ERROR);
+    }
+
+    sts = hppiDeleteVirtualMatrices(accel, virtMatrix);
+    CV_Assert(sts==HPP_STATUS_NO_ERROR);
+    sts = hppDeleteInstance(accel);
+    CV_Assert(sts==HPP_STATUS_NO_ERROR);
+}
+
+PARAM_TEST_CASE(IPPAsyncShared, Channels, hppAccelType)
+{
+    int cn;
+    int type;
+    hppAccelType accelType;
+
+    Mat matrix, result;
+    hppiMatrix* hppMat;
+    hppAccel accel;
+    hppiVirtualMatrix * virtMatrix;
+    hppStatus sts;
+
+    virtual void SetUp()
+    {
+        cn = GET_PARAM(0);
+        accelType = GET_PARAM(1);
+        type=CV_MAKE_TYPE(CV_8U, GET_PARAM(0));
+    }
+
+    virtual void generateTestData()
+    {
+        Size matrix_Size = randomSize(2, 100);
+        hpp32u pitch, size;
+        const int upValue = 100;
+
+        sts = hppQueryMatrixAllocParams(accel, (hpp32u)(matrix_Size.width*cn), (hpp32u)matrix_Size.height, HPP_DATA_TYPE_8U, &pitch, &size);
+
+        if (pitch!=0 && size!=0)
+        {
+            uchar *pData = (uchar*)_aligned_malloc(size, 4096);
+
+            for (int j=0; j<matrix_Size.height; j++)
+                for(int i=0; i<matrix_Size.width*cn; i++)
+                    pData[i+j*pitch] = rand()%upValue;
+
+            matrix = Mat(matrix_Size.height, matrix_Size.width, type, pData, pitch);
+        }
+
+        matrix = randomMat(matrix_Size, type, 0, upValue);
+    }
+
+    void Near(double threshold = 0.0)
+    {
+        EXPECT_MAT_NEAR(matrix, result, threshold);
+    }
+};
+
+TEST_P(IPPAsyncShared, accuracy)
+{
+    sts = hppCreateInstance(accelType, 0, &accel);
+    if (sts!=HPP_STATUS_NO_ERROR) printf("hppStatus = %d\n",sts);
+    CV_Assert(sts==HPP_STATUS_NO_ERROR);
+
+    virtMatrix = hppiCreateVirtualMatrices(accel, 2);
+
+    for (int j = 0; j < test_loop_times; j++)
+    {
+        generateTestData();
+        hppMat = hpp::getHpp(matrix,accel);
+
+        hppScalar a = 3;
+
+        sts = hppiAddC(accel, hppMat, a, 0, virtMatrix[0]);
+        CV_Assert(sts==HPP_STATUS_NO_ERROR);
+        sts = hppiSubC(accel, virtMatrix[0], a, 0, virtMatrix[1]);
+        CV_Assert(sts==HPP_STATUS_NO_ERROR);
+
+        sts = hppWait(accel, HPP_TIME_OUT_INFINITE);
+        CV_Assert(sts==HPP_STATUS_NO_ERROR);
+
+        result = hpp::getMat(virtMatrix[1], accel, cn);
+
+        Near(0);
+
+        sts =  hppiFreeMatrix(hppMat);
+        CV_Assert(sts==HPP_STATUS_NO_ERROR);
+    }
+
+    sts = hppiDeleteVirtualMatrices(accel, virtMatrix);
+    CV_Assert(sts==HPP_STATUS_NO_ERROR);
+    sts = hppDeleteInstance(accel);
+    CV_Assert(sts==HPP_STATUS_NO_ERROR);
+}
+
+INSTANTIATE_TEST_CASE_P(IppATest, IPPAsyncShared, Combine(Values(1, 2, 3, 4),
+                                                    Values( HPP_ACCEL_TYPE_CPU, HPP_ACCEL_TYPE_GPU)));
+
+INSTANTIATE_TEST_CASE_P(IppATest, IPPAsync, Combine(Values(CV_8U, CV_16U, CV_16S, CV_32F),
+                                                   Values(1, 2, 3, 4),
+                                                   Values( HPP_ACCEL_TYPE_CPU, HPP_ACCEL_TYPE_GPU)));
+
+}
+}
+#endif
--- a/modules/core/test/test_mat.cpp
+++ b/modules/core/test/test_mat.cpp
@ -340,7 +340,7 @@ protected:
            Mat Qv = Q * v;

            Mat lv = eval.at<float>(i,0) * v;
-            err = norm( Qv, lv );
+            err = cvtest::norm( Qv, lv, NORM_L2 );
            if( err > eigenEps )
            {
                ts->printf( cvtest::TS::LOG, "bad accuracy of eigen(); err = %f\n", err );
@ -350,7 +350,7 @@ protected:
        }
        // check pca eigenvalues
        evalEps = 1e-6, evecEps = 1e-3;
-        err = norm( rPCA.eigenvalues, subEval );
+        err = cvtest::norm( rPCA.eigenvalues, subEval, NORM_L2 );
        if( err > evalEps )
        {
            ts->printf( cvtest::TS::LOG, "pca.eigenvalues is incorrect (CV_PCA_DATA_AS_ROW); err = %f\n", err );
@ -362,11 +362,11 @@ protected:
        {
            Mat r0 = rPCA.eigenvectors.row(i);
            Mat r1 = subEvec.row(i);
-            err = norm( r0, r1, CV_L2 );
+            err = cvtest::norm( r0, r1, CV_L2 );
            if( err > evecEps )
            {
                r1 *= -1;
-                double err2 = norm(r0, r1, CV_L2);
+                double err2 = cvtest::norm(r0, r1, CV_L2);
                if( err2 > evecEps )
                {
                    Mat tmp;
@ -390,7 +390,7 @@ protected:
            // check pca project
            Mat subEvec_t = subEvec.t();
            Mat prj = rTestPoints.row(i) - avg; prj *= subEvec_t;
-            err = norm(rPrjTestPoints.row(i), prj, CV_RELATIVE_L2);
+            err = cvtest::norm(rPrjTestPoints.row(i), prj, CV_RELATIVE_L2);
            if( err > prjEps )
            {
                ts->printf( cvtest::TS::LOG, "bad accuracy of project() (CV_PCA_DATA_AS_ROW); err = %f\n", err );
@ -399,7 +399,7 @@ protected:
            }
            // check pca backProject
            Mat backPrj = rPrjTestPoints.row(i) * subEvec + avg;
-            err = norm( rBackPrjTestPoints.row(i), backPrj, CV_RELATIVE_L2 );
+            err = cvtest::norm( rBackPrjTestPoints.row(i), backPrj, CV_RELATIVE_L2 );
            if( err > backPrjEps )
            {
                ts->printf( cvtest::TS::LOG, "bad accuracy of backProject() (CV_PCA_DATA_AS_ROW); err = %f\n", err );
@ -412,14 +412,14 @@ protected:
        cPCA( rPoints.t(), Mat(), CV_PCA_DATA_AS_COL, maxComponents );
        diffPrjEps = 1, diffBackPrjEps = 1;
        Mat ocvPrjTestPoints = cPCA.project(rTestPoints.t());
-        err = norm(cv::abs(ocvPrjTestPoints), cv::abs(rPrjTestPoints.t()), CV_RELATIVE_L2 );
+        err = cvtest::norm(cv::abs(ocvPrjTestPoints), cv::abs(rPrjTestPoints.t()), CV_RELATIVE_L2 );
        if( err > diffPrjEps )
        {
            ts->printf( cvtest::TS::LOG, "bad accuracy of project() (CV_PCA_DATA_AS_COL); err = %f\n", err );
            ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
            return;
        }
-        err = norm(cPCA.backProject(ocvPrjTestPoints), rBackPrjTestPoints.t(), CV_RELATIVE_L2 );
+        err = cvtest::norm(cPCA.backProject(ocvPrjTestPoints), rBackPrjTestPoints.t(), CV_RELATIVE_L2 );
        if( err > diffBackPrjEps )
        {
            ts->printf( cvtest::TS::LOG, "bad accuracy of backProject() (CV_PCA_DATA_AS_COL); err = %f\n", err );
@ -433,9 +433,9 @@ protected:
        Mat rvPrjTestPoints = cPCA.project(rTestPoints.t());

        if( cPCA.eigenvectors.rows > maxComponents)
-            err = norm(cv::abs(rvPrjTestPoints.rowRange(0,maxComponents)), cv::abs(rPrjTestPoints.t()), CV_RELATIVE_L2 );
+            err = cvtest::norm(cv::abs(rvPrjTestPoints.rowRange(0,maxComponents)), cv::abs(rPrjTestPoints.t()), CV_RELATIVE_L2 );
        else
-            err = norm(cv::abs(rvPrjTestPoints), cv::abs(rPrjTestPoints.colRange(0,cPCA.eigenvectors.rows).t()), CV_RELATIVE_L2 );
+            err = cvtest::norm(cv::abs(rvPrjTestPoints), cv::abs(rPrjTestPoints.colRange(0,cPCA.eigenvectors.rows).t()), CV_RELATIVE_L2 );

        if( err > diffPrjEps )
        {
@ -443,7 +443,7 @@ protected:
            ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
            return;
        }
-        err = norm(cPCA.backProject(rvPrjTestPoints), rBackPrjTestPoints.t(), CV_RELATIVE_L2 );
+        err = cvtest::norm(cPCA.backProject(rvPrjTestPoints), rBackPrjTestPoints.t(), CV_RELATIVE_L2 );
        if( err > diffBackPrjEps )
        {
            ts->printf( cvtest::TS::LOG, "bad accuracy of backProject() (CV_PCA_DATA_AS_COL); retainedVariance=0.95; err = %f\n", err );
@ -467,14 +467,14 @@ protected:
        cvProjectPCA( &_testPoints, &_avg, &_evec, &_prjTestPoints );
        cvBackProjectPCA( &_prjTestPoints, &_avg, &_evec, &_backPrjTestPoints );

-        err = norm(prjTestPoints, rPrjTestPoints, CV_RELATIVE_L2);
+        err = cvtest::norm(prjTestPoints, rPrjTestPoints, CV_RELATIVE_L2);
        if( err > diffPrjEps )
        {
            ts->printf( cvtest::TS::LOG, "bad accuracy of cvProjectPCA() (CV_PCA_DATA_AS_ROW); err = %f\n", err );
            ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
            return;
        }
-        err = norm(backPrjTestPoints, rBackPrjTestPoints, CV_RELATIVE_L2);
+        err = cvtest::norm(backPrjTestPoints, rBackPrjTestPoints, CV_RELATIVE_L2);
        if( err > diffBackPrjEps )
        {
            ts->printf( cvtest::TS::LOG, "bad accuracy of cvBackProjectPCA() (CV_PCA_DATA_AS_ROW); err = %f\n", err );
@ -495,14 +495,14 @@ protected:
        cvProjectPCA( &_testPoints, &_avg, &_evec, &_prjTestPoints );
        cvBackProjectPCA( &_prjTestPoints, &_avg, &_evec, &_backPrjTestPoints );

-        err = norm(cv::abs(prjTestPoints), cv::abs(rPrjTestPoints.t()), CV_RELATIVE_L2 );
+        err = cvtest::norm(cv::abs(prjTestPoints), cv::abs(rPrjTestPoints.t()), CV_RELATIVE_L2 );
        if( err > diffPrjEps )
        {
            ts->printf( cvtest::TS::LOG, "bad accuracy of cvProjectPCA() (CV_PCA_DATA_AS_COL); err = %f\n", err );
            ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
            return;
        }
-        err = norm(backPrjTestPoints, rBackPrjTestPoints.t(), CV_RELATIVE_L2);
+        err = cvtest::norm(backPrjTestPoints, rBackPrjTestPoints.t(), CV_RELATIVE_L2);
        if( err > diffBackPrjEps )
        {
            ts->printf( cvtest::TS::LOG, "bad accuracy of cvBackProjectPCA() (CV_PCA_DATA_AS_COL); err = %f\n", err );
@ -518,19 +518,19 @@ protected:
        PCA lPCA;
        fs.open( "PCA_store.yml", FileStorage::READ );
        lPCA.read( fs.root() );
-        err = norm( rPCA.eigenvectors, lPCA.eigenvectors, CV_RELATIVE_L2 );
+        err = cvtest::norm( rPCA.eigenvectors, lPCA.eigenvectors, CV_RELATIVE_L2 );
        if( err > 0 )
        {
            ts->printf( cvtest::TS::LOG, "bad accuracy of write/load functions (YML); err = %f\n", err );
            ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
        }
-        err = norm( rPCA.eigenvalues, lPCA.eigenvalues, CV_RELATIVE_L2 );
+        err = cvtest::norm( rPCA.eigenvalues, lPCA.eigenvalues, CV_RELATIVE_L2 );
        if( err > 0 )
        {
            ts->printf( cvtest::TS::LOG, "bad accuracy of write/load functions (YML); err = %f\n", err );
            ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
        }
-        err = norm( rPCA.mean, lPCA.mean, CV_RELATIVE_L2 );
+        err = cvtest::norm( rPCA.mean, lPCA.mean, CV_RELATIVE_L2 );
        if( err > 0 )
        {
            ts->printf( cvtest::TS::LOG, "bad accuracy of write/load functions (YML); err = %f\n", err );
@ -731,9 +731,9 @@ void Core_ArrayOpTest::run( int /* start_from */)
        }

        minMaxLoc(_all_vals, &min_val, &max_val);
-        double _norm0 = norm(_all_vals, CV_C);
-        double _norm1 = norm(_all_vals, CV_L1);
-        double _norm2 = norm(_all_vals, CV_L2);
+        double _norm0 = cvtest::norm(_all_vals, CV_C);
+        double _norm1 = cvtest::norm(_all_vals, CV_L1);
+        double _norm2 = cvtest::norm(_all_vals, CV_L2);

        for( i = 0; i < nz0; i++ )
        {
--- a/modules/core/test/test_math.cpp
+++ b/modules/core/test/test_math.cpp
@ -2433,7 +2433,7 @@ protected:
        }

        Mat convertedRes = resInRad * 180. / CV_PI;
-        double normDiff = norm(convertedRes - resInDeg, NORM_INF);
+        double normDiff = cvtest::norm(convertedRes - resInDeg, NORM_INF);
        if(normDiff > FLT_EPSILON * 180.)
        {
            ts->printf(cvtest::TS::LOG, "There are incorrect result angles (in radians)\n");
@ -2569,11 +2569,11 @@ TEST(Core_Invert, small)
    cv::Mat b = a.t()*a;
    cv::Mat c, i = Mat_<float>::eye(3, 3);
    cv::invert(b, c, cv::DECOMP_LU); //std::cout << b*c << std::endl;
-    ASSERT_LT( cv::norm(b*c, i, CV_C), 0.1 );
+    ASSERT_LT( cvtest::norm(b*c, i, CV_C), 0.1 );
    cv::invert(b, c, cv::DECOMP_SVD); //std::cout << b*c << std::endl;
-    ASSERT_LT( cv::norm(b*c, i, CV_C), 0.1 );
+    ASSERT_LT( cvtest::norm(b*c, i, CV_C), 0.1 );
    cv::invert(b, c, cv::DECOMP_CHOLESKY); //std::cout << b*c << std::endl;
-    ASSERT_LT( cv::norm(b*c, i, CV_C), 0.1 );
+    ASSERT_LT( cvtest::norm(b*c, i, CV_C), 0.1 );
 }

 /////////////////////////////////////////////////////////////////////////////////////////////////////
@ -2621,7 +2621,7 @@ TEST(Core_SVD, flt)
    Mat X, B1;
    solve(A, B, X, DECOMP_SVD);
    B1 = A*X;
-    EXPECT_LE(norm(B1, B, NORM_L2 + NORM_RELATIVE), FLT_EPSILON*10);
+    EXPECT_LE(cvtest::norm(B1, B, NORM_L2 + NORM_RELATIVE), FLT_EPSILON*10);
 }


--- a/modules/core/test/test_operations.cpp
+++ b/modules/core/test/test_operations.cpp
@ -83,11 +83,11 @@ protected:

    void checkDiff(const Mat& m1, const Mat& m2, const string& s)
    {
-        if (norm(m1, m2, NORM_INF) != 0) throw test_excep(s);
+        if (cvtest::norm(m1, m2, NORM_INF) != 0) throw test_excep(s);
    }
    void checkDiffF(const Mat& m1, const Mat& m2, const string& s)
    {
-        if (norm(m1, m2, NORM_INF) > 1e-5) throw test_excep(s);
+        if (cvtest::norm(m1, m2, NORM_INF) > 1e-5) throw test_excep(s);
    }
 };

@ -488,7 +488,7 @@ bool CV_OperationsTest::TestSubMatAccess()
            coords.push_back(T_bs(i));
            //std::cout << T_bs1(i) << std::endl;
        }
-        CV_Assert( norm(coords, T_bs.reshape(1,1), NORM_INF) == 0 );
+        CV_Assert( cvtest::norm(coords, T_bs.reshape(1,1), NORM_INF) == 0 );
    }
    catch (const test_excep& e)
    {
@ -776,14 +776,14 @@ bool CV_OperationsTest::TestTemplateMat()
        mvf.push_back(Mat_<float>::zeros(4, 3));
        merge(mvf, mf2);
        split(mf2, mvf2);
-        CV_Assert( norm(mvf2[0], mvf[0], CV_C) == 0 &&
-                  norm(mvf2[1], mvf[1], CV_C) == 0 );
+        CV_Assert( cvtest::norm(mvf2[0], mvf[0], CV_C) == 0 &&
+                  cvtest::norm(mvf2[1], mvf[1], CV_C) == 0 );

        {
        Mat a(2,2,CV_32F,1.f);
        Mat b(1,2,CV_32F,1.f);
        Mat c = (a*b.t()).t();
-        CV_Assert( norm(c, CV_L1) == 4. );
+        CV_Assert( cvtest::norm(c, CV_L1) == 4. );
        }

        bool badarg_catched = false;
@ -988,7 +988,7 @@ bool CV_OperationsTest::operations1()

        Vec<double,10> v10dzero;
        for (int ii = 0; ii < 10; ++ii) {
-            if (!v10dzero[ii] == 0.0)
+            if (v10dzero[ii] != 0.0)
                throw test_excep();
        }

@ -1014,13 +1014,13 @@ bool CV_OperationsTest::operations1()
        Matx33f b(1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f);
        Mat c;
        add(Mat::zeros(3, 3, CV_32F), b, c);
-        CV_Assert( norm(b, c, CV_C) == 0 );
+        CV_Assert( cvtest::norm(b, c, CV_C) == 0 );

        add(Mat::zeros(3, 3, CV_64F), b, c, noArray(), c.type());
-        CV_Assert( norm(b, c, CV_C) == 0 );
+        CV_Assert( cvtest::norm(b, c, CV_C) == 0 );

        add(Mat::zeros(6, 1, CV_64F), 1, c, noArray(), c.type());
-        CV_Assert( norm(Matx61f(1.f, 1.f, 1.f, 1.f, 1.f, 1.f), c, CV_C) == 0 );
+        CV_Assert( cvtest::norm(Matx61f(1.f, 1.f, 1.f, 1.f, 1.f, 1.f), c, CV_C) == 0 );

        vector<Point2f> pt2d(3);
        vector<Point3d> pt3d(2);
@ -1066,11 +1066,11 @@ bool CV_OperationsTest::TestSVD()
        Mat A = (Mat_<double>(3,4) << 1, 2, -1, 4, 2, 4, 3, 5, -1, -2, 6, 7);
        Mat x;
        SVD::solveZ(A,x);
-        if( norm(A*x, CV_C) > FLT_EPSILON )
+        if( cvtest::norm(A*x, CV_C) > FLT_EPSILON )
            throw test_excep();

        SVD svd(A, SVD::FULL_UV);
-        if( norm(A*svd.vt.row(3).t(), CV_C) > FLT_EPSILON )
+        if( cvtest::norm(A*svd.vt.row(3).t(), CV_C) > FLT_EPSILON )
            throw test_excep();

        Mat Dp(3,3,CV_32FC1);
@ -1094,11 +1094,11 @@ bool CV_OperationsTest::TestSVD()
        W=decomp.w;
        Mat I = Mat::eye(3, 3, CV_32F);

-        if( norm(U*U.t(), I, CV_C) > FLT_EPSILON ||
-            norm(Vt*Vt.t(), I, CV_C) > FLT_EPSILON ||
+        if( cvtest::norm(U*U.t(), I, CV_C) > FLT_EPSILON ||
+            cvtest::norm(Vt*Vt.t(), I, CV_C) > FLT_EPSILON ||
            W.at<float>(2) < 0 || W.at<float>(1) < W.at<float>(2) ||
            W.at<float>(0) < W.at<float>(1) ||
-            norm(U*Mat::diag(W)*Vt, Q, CV_C) > FLT_EPSILON )
+            cvtest::norm(U*Mat::diag(W)*Vt, Q, CV_C) > FLT_EPSILON )
            throw test_excep();
    }
    catch(const test_excep&)
--- a/modules/core/test/test_rand.cpp
+++ b/modules/core/test/test_rand.cpp
@ -174,7 +174,7 @@ void Core_RandTest::run( int )
            }
        }

-        if( maxk >= 1 && norm(arr[0], arr[1], NORM_INF) > eps)
+        if( maxk >= 1 && cvtest::norm(arr[0], arr[1], NORM_INF) > eps)
        {
            ts->printf( cvtest::TS::LOG, "RNG output depends on the array lengths (some generated numbers get lost?)" );
            ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
--- a/modules/core/test/test_umat.cpp
+++ b/modules/core/test/test_umat.cpp
@ -563,12 +563,12 @@ protected:

    void checkDiff(const Mat& m1, const Mat& m2, const string& s)
    {
-        if (norm(m1, m2, NORM_INF) != 0)
+        if (cvtest::norm(m1, m2, NORM_INF) != 0)
            throw test_excep(s);
    }
    void checkDiffF(const Mat& m1, const Mat& m2, const string& s)
    {
-        if (norm(m1, m2, NORM_INF) > 1e-5)
+        if (cvtest::norm(m1, m2, NORM_INF) > 1e-5)
            throw test_excep(s);
    }
 };
@ -721,7 +721,7 @@ TEST(Core_UMat, getUMat)
            um.setTo(17);
        }

-        double err = norm(m, ref, NORM_INF);
+        double err = cvtest::norm(m, ref, NORM_INF);
        if (err > 0)
        {
            std::cout << "m: " << std::endl << m << std::endl;
@ -742,7 +742,7 @@ TEST(UMat, Sync)

    um.setTo(cv::Scalar::all(19));

-    EXPECT_EQ(0, cv::norm(um.getMat(ACCESS_READ), cv::Mat(um.size(), um.type(), 19), NORM_INF));
+    EXPECT_EQ(0, cvtest::norm(um.getMat(ACCESS_READ), cv::Mat(um.size(), um.type(), 19), NORM_INF));
 }

 TEST(UMat, setOpenCL)
--- a/modules/cuda/doc/introduction.rst
+++ b/modules/cuda/doc/introduction.rst
@ -45,7 +45,7 @@ Utilizing Multiple GPUs
 -----------------------

 In the current version, each of the OpenCV CUDA algorithms can use only a single GPU. So, to utilize multiple GPUs, you have to manually distribute the work between GPUs.
-Switching active devie can be done using :ocv:func:`cuda::setDevice()` function.  For more details please read Cuda C Programing Guide.
+Switching active devie can be done using :ocv:func:`cuda::setDevice()` function.  For more details please read Cuda C Programming Guide.

 While developing algorithms for multiple GPUs, note a data passing overhead. For primitive functions and small images, it can be significant, which may eliminate all the advantages of having multiple GPUs. But for high-level algorithms, consider using multi-GPU acceleration. For example, the Stereo Block Matching algorithm has been successfully parallelized using the following algorithm:

--- a/modules/cudabgsegm/test/test_bgsegm.cpp
+++ b/modules/cudabgsegm/test/test_bgsegm.cpp
@ -323,7 +323,7 @@ CUDA_TEST_P(MOG2, getBackgroundImage)
    cv::Mat background_gold;
    mog2_gold->getBackgroundImage(background_gold);

-    ASSERT_MAT_NEAR(background_gold, background, 0);
+    ASSERT_MAT_NEAR(background_gold, background, 1);
 }

 INSTANTIATE_TEST_CASE_P(CUDA_BgSegm, MOG2, testing::Combine(
--- a/modules/features2d/doc/common_interfaces_of_descriptor_extractors.rst
+++ b/modules/features2d/doc/common_interfaces_of_descriptor_extractors.rst
@ -69,7 +69,7 @@ Computes the descriptors for a set of keypoints detected in an image (first vari

    :param keypoints: Input collection of keypoints. Keypoints for which a descriptor cannot be computed are removed. Sometimes new keypoints can be added, for example: ``SIFT`` duplicates keypoint with several dominant orientations (for each orientation).

-    :param descriptors: Computed descriptors. In the second variant of the method ``descriptors[i]`` are descriptors computed for a ``keypoints[i]`. Row ``j`` is the ``keypoints`` (or ``keypoints[i]``) is the descriptor for keypoint ``j``-th keypoint.
+    :param descriptors: Computed descriptors. In the second variant of the method ``descriptors[i]`` are descriptors computed for a ``keypoints[i]``. Row ``j`` is the ``keypoints`` (or ``keypoints[i]``) is the descriptor for keypoint ``j``-th keypoint.


 DescriptorExtractor::create
--- a/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst
+++ b/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst
@ -249,7 +249,7 @@ Brute-force matcher constructor.

    :param normType: One of ``NORM_L1``, ``NORM_L2``, ``NORM_HAMMING``, ``NORM_HAMMING2``. ``L1`` and ``L2`` norms are preferable choices for SIFT and SURF descriptors, ``NORM_HAMMING`` should be used with ORB, BRISK and BRIEF, ``NORM_HAMMING2`` should be used with ORB when ``WTA_K==3`` or ``4`` (see ORB::ORB constructor description).

-    :param crossCheck: If it is false, this is will be default BFMatcher behaviour when it finds the k nearest neighbors for each query descriptor. If ``crossCheck==true``, then the ``knnMatch()`` method with ``k=1`` will only return pairs ``(i,j)`` such that for ``i-th`` query descriptor the ``j-th`` descriptor in the matcher's collection is the nearest and vice versa, i.e. the ``BFMathcher`` will only return consistent pairs. Such technique usually produces best results with minimal number of outliers when there are enough matches. This is alternative to the ratio test, used by D. Lowe in SIFT paper.
+    :param crossCheck: If it is false, this is will be default BFMatcher behaviour when it finds the k nearest neighbors for each query descriptor. If ``crossCheck==true``, then the ``knnMatch()`` method with ``k=1`` will only return pairs ``(i,j)`` such that for ``i-th`` query descriptor the ``j-th`` descriptor in the matcher's collection is the nearest and vice versa, i.e. the ``BFMatcher`` will only return consistent pairs. Such technique usually produces best results with minimal number of outliers when there are enough matches. This is alternative to the ratio test, used by D. Lowe in SIFT paper.


 FlannBasedMatcher
--- a/modules/features2d/perf/opencl/perf_brute_force_matcher.cpp
+++ b/modules/features2d/perf/opencl/perf_brute_force_matcher.cpp
@ -123,7 +123,7 @@ OCL_PERF_TEST_P(BruteForceMatcherFixture, RadiusMatch, ::testing::Combine(OCL_PE
    SANITY_CHECK_MATCHES(matches1, 1e-3);
 }

-}//ocl
-}//cvtest
+} // ocl
+} // cvtest

-#endif //HAVE_OPENCL
+#endif // HAVE_OPENCL
--- a/modules/features2d/perf/opencl/perf_fast.cpp
+++ b/modules/features2d/perf/opencl/perf_fast.cpp
@ -0,0 +1,50 @@
+#include "perf_precomp.hpp"
+#include "opencv2/ts/ocl_perf.hpp"
+
+#ifdef HAVE_OPENCL
+
+namespace cvtest {
+namespace ocl {
+
+enum { TYPE_5_8 =FastFeatureDetector::TYPE_5_8, TYPE_7_12 = FastFeatureDetector::TYPE_7_12, TYPE_9_16 = FastFeatureDetector::TYPE_9_16 };
+CV_ENUM(FastType, TYPE_5_8, TYPE_7_12)
+
+typedef std::tr1::tuple<string, FastType> File_Type_t;
+typedef TestBaseWithParam<File_Type_t> FASTFixture;
+
+#define FAST_IMAGES \
+    "cv/detectors_descriptors_evaluation/images_datasets/leuven/img1.png",\
+    "stitching/a3.png"
+
+OCL_PERF_TEST_P(FASTFixture, FastDetect, testing::Combine(
+                            testing::Values(FAST_IMAGES),
+                            FastType::all()
+                          ))
+{
+    string filename = getDataPath(get<0>(GetParam()));
+    int type = get<1>(GetParam());
+    Mat mframe = imread(filename, IMREAD_GRAYSCALE);
+
+    if (mframe.empty())
+        FAIL() << "Unable to load source image " << filename;
+
+    UMat frame;
+    mframe.copyTo(frame);
+    declare.in(frame);
+
+    Ptr<FeatureDetector> fd = Algorithm::create<FeatureDetector>("Feature2D.FAST");
+    ASSERT_FALSE( fd.empty() );
+    fd->set("threshold", 20);
+    fd->set("nonmaxSuppression", true);
+    fd->set("type", type);
+    vector<KeyPoint> points;
+
+    OCL_TEST_CYCLE() fd->detect(frame, points);
+
+    SANITY_CHECK_KEYPOINTS(points);
+}
+
+} // ocl
+} // cvtest
+
+#endif // HAVE_OPENCL
--- a/modules/features2d/perf/opencl/perf_orb.cpp
+++ b/modules/features2d/perf/opencl/perf_orb.cpp
@ -0,0 +1,86 @@
+#include "perf_precomp.hpp"
+#include "opencv2/ts/ocl_perf.hpp"
+
+#ifdef HAVE_OPENCL
+
+namespace cvtest {
+namespace ocl {
+
+typedef ::perf::TestBaseWithParam<std::string> ORBFixture;
+
+#define ORB_IMAGES OCL_PERF_ENUM("cv/detectors_descriptors_evaluation/images_datasets/leuven/img1.png", "stitching/a3.png")
+
+OCL_PERF_TEST_P(ORBFixture, ORB_Detect, ORB_IMAGES)
+{
+    string filename = getDataPath(GetParam());
+    Mat mframe = imread(filename, IMREAD_GRAYSCALE);
+
+    if (mframe.empty())
+        FAIL() << "Unable to load source image " << filename;
+
+    UMat frame, mask;
+    mframe.copyTo(frame);
+
+    declare.in(frame);
+    ORB detector(1500, 1.3f, 1);
+    vector<KeyPoint> points;
+
+    OCL_TEST_CYCLE() detector(frame, mask, points);
+
+    std::sort(points.begin(), points.end(), comparators::KeypointGreater());
+    SANITY_CHECK_KEYPOINTS(points, 1e-5);
+}
+
+OCL_PERF_TEST_P(ORBFixture, ORB_Extract, ORB_IMAGES)
+{
+    string filename = getDataPath(GetParam());
+    Mat mframe = imread(filename, IMREAD_GRAYSCALE);
+
+    if (mframe.empty())
+        FAIL() << "Unable to load source image " << filename;
+
+    UMat mask, frame;
+    mframe.copyTo(frame);
+
+    declare.in(frame);
+
+    ORB detector(1500, 1.3f, 1);
+    vector<KeyPoint> points;
+    detector(frame, mask, points);
+    std::sort(points.begin(), points.end(), comparators::KeypointGreater());
+
+    UMat descriptors;
+
+    OCL_TEST_CYCLE() detector(frame, mask, points, descriptors, true);
+
+    SANITY_CHECK(descriptors);
+}
+
+OCL_PERF_TEST_P(ORBFixture, ORB_Full, ORB_IMAGES)
+{
+    string filename = getDataPath(GetParam());
+    Mat mframe = imread(filename, IMREAD_GRAYSCALE);
+
+    if (mframe.empty())
+        FAIL() << "Unable to load source image " << filename;
+
+    UMat mask, frame;
+    mframe.copyTo(frame);
+
+    declare.in(frame);
+    ORB detector(1500, 1.3f, 1);
+
+    vector<KeyPoint> points;
+    UMat descriptors;
+
+    OCL_TEST_CYCLE() detector(frame, mask, points, descriptors, false);
+
+    ::perf::sort(points, descriptors);
+    SANITY_CHECK_KEYPOINTS(points, 1e-5);
+    SANITY_CHECK(descriptors);
+}
+
+} // ocl
+} // cvtest
+
+#endif // HAVE_OPENCL
--- a/modules/features2d/test/test_fast.cpp
+++ b/modules/features2d/test/test_fast.cpp
@ -119,8 +119,8 @@ void CV_FastTest::run( int )
    read( fs["exp_kps2"], exp_kps2, Mat() );
    fs.release();

-    if ( exp_kps1.size != kps1.size || 0 != norm(exp_kps1, kps1, NORM_L2) ||
-         exp_kps2.size != kps2.size || 0 != norm(exp_kps2, kps2, NORM_L2))
+     if ( exp_kps1.size != kps1.size || 0 != cvtest::norm(exp_kps1, kps1, NORM_L2) ||
+          exp_kps2.size != kps2.size || 0 != cvtest::norm(exp_kps2, kps2, NORM_L2))
    {
        ts->set_failed_test_info(cvtest::TS::FAIL_MISMATCH);
        return;
--- a/modules/features2d/test/test_nearestneighbors.cpp
+++ b/modules/features2d/test/test_nearestneighbors.cpp
@ -193,8 +193,8 @@ int CV_KDTreeTest_CPP::checkGetPoins( const Mat& data )
    // 3d way
    tr->getPoints( idxs, res3 );

-    if( norm( res1, data, NORM_L1) != 0 ||
-        norm( res3, data, NORM_L1) != 0)
+    if( cvtest::norm( res1, data, NORM_L1) != 0 ||
+        cvtest::norm( res3, data, NORM_L1) != 0)
        return cvtest::TS::FAIL_BAD_ACCURACY;
    return cvtest::TS::OK;
 }
@ -232,7 +232,7 @@ int CV_KDTreeTest_CPP::findNeighbors( Mat& points, Mat& neighbors )
    }

    // compare results
-    if( norm( neighbors, neighbors2, NORM_L1 ) != 0 )
+    if( cvtest::norm( neighbors, neighbors2, NORM_L1 ) != 0 )
        return cvtest::TS::FAIL_BAD_ACCURACY;

    return cvtest::TS::OK;
@ -284,7 +284,7 @@ int CV_FlannTest::knnSearch( Mat& points, Mat& neighbors )
    }

    // compare results
-    if( norm( neighbors, neighbors1, NORM_L1 ) != 0 )
+    if( cvtest::norm( neighbors, neighbors1, NORM_L1 ) != 0 )
        return cvtest::TS::FAIL_BAD_ACCURACY;

    return cvtest::TS::OK;
@ -316,7 +316,7 @@ int CV_FlannTest::radiusSearch( Mat& points, Mat& neighbors )
            neighbors1.at<int>(i,j) = *it;
    }
    // compare results
-    if( norm( neighbors, neighbors1, NORM_L1 ) != 0 )
+    if( cvtest::norm( neighbors, neighbors1, NORM_L1 ) != 0 )
        return cvtest::TS::FAIL_BAD_ACCURACY;

    return cvtest::TS::OK;
--- a/modules/highgui/include/opencv2/highgui.hpp
+++ b/modules/highgui/include/opencv2/highgui.hpp
@ -217,6 +217,7 @@ enum { IMREAD_UNCHANGED  = -1, // 8bit, color or not

 enum { IMWRITE_JPEG_QUALITY     = 1,
       IMWRITE_JPEG_PROGRESSIVE = 2,
+       IMWRITE_JPEG_OPTIMIZE    = 3,
       IMWRITE_PNG_COMPRESSION  = 16,
       IMWRITE_PNG_STRATEGY     = 17,
       IMWRITE_PNG_BILEVEL      = 18,
--- a/modules/highgui/include/opencv2/highgui/highgui_c.h
+++ b/modules/highgui/include/opencv2/highgui/highgui_c.h
@ -221,6 +221,7 @@ enum
 {
    CV_IMWRITE_JPEG_QUALITY =1,
    CV_IMWRITE_JPEG_PROGRESSIVE =2,
+    CV_IMWRITE_JPEG_OPTIMIZE =3,
    CV_IMWRITE_PNG_COMPRESSION =16,
    CV_IMWRITE_PNG_STRATEGY =17,
    CV_IMWRITE_PNG_BILEVEL =18,
--- a/modules/highgui/src/cap_avfoundation.mm
+++ b/modules/highgui/src/cap_avfoundation.mm
@ -1309,6 +1309,8 @@ bool CvVideoWriter_AVFoundation::writeFrame(const IplImage* iplimage) {
    }

    //cleanup
+    CFRelease(cfData);
+    CVPixelBufferRelease(pixelBuffer);
    CGImageRelease(cgImage);
    CGDataProviderRelease(provider);
    CGColorSpaceRelease(colorSpace);
--- a/modules/highgui/src/grfmt_jpeg.cpp
+++ b/modules/highgui/src/grfmt_jpeg.cpp
@ -599,6 +599,7 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )

        int quality = 95;
        int progressive = 0;
+        int optimize = 0;

        for( size_t i = 0; i < params.size(); i += 2 )
        {
@ -612,6 +613,11 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
            {
                progressive = params[i+1];
            }
+
+            if( params[i] == CV_IMWRITE_JPEG_OPTIMIZE )
+            {
+                optimize = params[i+1];
+            }
        }

        jpeg_set_defaults( &cinfo );
@ -619,6 +625,8 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
                          TRUE /* limit to baseline-JPEG values */ );
        if( progressive )
            jpeg_simple_progression( &cinfo );
+        if( optimize )
+            cinfo.optimize_coding = TRUE;
        jpeg_start_compress( &cinfo, TRUE );

        if( channels > 1 )
--- a/modules/highgui/test/test_drawing.cpp
+++ b/modules/highgui/test/test_drawing.cpp
@ -76,7 +76,7 @@ void CV_DrawingTest::run( int )
    }
    else
    {
-        float err = (float)norm( testImg, valImg, CV_RELATIVE_L1 );
+        float err = (float)cvtest::norm( testImg, valImg, CV_RELATIVE_L1 );
        float Eps = 0.9f;
        if( err > Eps)
        {
@ -229,7 +229,7 @@ int CV_DrawingTest_CPP::checkLineIterator( Mat& img )
    for(int i = 0; i < it.count; ++it, i++ )
    {
        Vec3b v = (Vec3b)(*(*it)) - img.at<Vec3b>(300,i);
-        float err = (float)norm( v );
+        float err = (float)cvtest::norm( v, NORM_L2 );
        if( err != 0 )
        {
            ts->printf( ts->LOG, "LineIterator works incorrect" );
@ -395,7 +395,7 @@ int CV_DrawingTest_C::checkLineIterator( Mat& _img )
    for(int i = 0; i < count; i++ )
    {
        Vec3b v = (Vec3b)(*(it.ptr)) - _img.at<Vec3b>(300,i);
-        float err = (float)norm( v );
+        float err = (float)cvtest::norm( v, NORM_L2 );
        if( err != 0 )
        {
            ts->printf( ts->LOG, "CvLineIterator works incorrect" );
--- a/modules/highgui/test/test_ffmpeg.cpp
+++ b/modules/highgui/test/test_ffmpeg.cpp
@ -163,7 +163,7 @@ public:

            CV_Assert( !img0.empty() && !img.empty() && img_next.empty() );

-            double diff = norm(img0, img, CV_C);
+            double diff = cvtest::norm(img0, img, CV_C);
            CV_Assert( diff == 0 );
        }
        catch(...)
--- a/modules/highgui/test/test_grfmt.cpp
+++ b/modules/highgui/test/test_grfmt.cpp
@ -121,7 +121,7 @@ public:
                        CV_Assert(img.type() == img_test.type());
                        CV_Assert(num_channels == img_test.channels());

-                        double n = norm(img, img_test);
+                        double n = cvtest::norm(img, img_test, NORM_L2);
                        if ( n > 1.0)
                        {
                            ts->printf(ts->LOG, "norm = %f \n", n);
@ -151,7 +151,7 @@ public:
                    CV_Assert(img.size() == img_test.size());
                    CV_Assert(img.type() == img_test.type());

-                    double n = norm(img, img_test);
+                    double n = cvtest::norm(img, img_test, NORM_L2);
                    if ( n > 1.0)
                    {
                        ts->printf(ts->LOG, "norm = %f \n", n);
@ -183,7 +183,7 @@ public:
                    CV_Assert(img.type() == img_test.type());


-                    double n = norm(img, img_test);
+                    double n = cvtest::norm(img, img_test, NORM_L2);
                    if ( n > 1.0)
                    {
                        ts->printf(ts->LOG, "norm = %f \n", n);
@ -210,7 +210,7 @@ public:
        {
            Mat rle = imread(string(ts->get_data_path()) + "readwrite/rle8.bmp");
            Mat bmp = imread(string(ts->get_data_path()) + "readwrite/ordinary.bmp");
-            if (norm(rle-bmp)>1.e-10)
+            if (cvtest::norm(rle-bmp, NORM_L2)>1.e-10)
                ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY);
        }
        catch(...)
@ -406,10 +406,34 @@ TEST(Highgui_Jpeg, encode_decode_progressive_jpeg)
    EXPECT_NO_THROW(cv::imwrite(output_normal, img));
    cv::Mat img_jpg_normal = cv::imread(output_normal);

-    EXPECT_EQ(0, cv::norm(img_jpg_progressive, img_jpg_normal, NORM_INF));
+    EXPECT_EQ(0, cvtest::norm(img_jpg_progressive, img_jpg_normal, NORM_INF));

    remove(output_progressive.c_str());
 }
+
+TEST(Highgui_Jpeg, encode_decode_optimize_jpeg)
+{
+    cvtest::TS& ts = *cvtest::TS::ptr();
+    string input = string(ts.get_data_path()) + "../cv/shared/lena.png";
+    cv::Mat img = cv::imread(input);
+    ASSERT_FALSE(img.empty());
+
+    std::vector<int> params;
+    params.push_back(IMWRITE_JPEG_OPTIMIZE);
+    params.push_back(1);
+
+    string output_optimized = cv::tempfile(".jpg");
+    EXPECT_NO_THROW(cv::imwrite(output_optimized, img, params));
+    cv::Mat img_jpg_optimized = cv::imread(output_optimized);
+
+    string output_normal = cv::tempfile(".jpg");
+    EXPECT_NO_THROW(cv::imwrite(output_normal, img));
+    cv::Mat img_jpg_normal = cv::imread(output_normal);
+
+    EXPECT_EQ(0, cvtest::norm(img_jpg_optimized, img_jpg_normal, NORM_INF));
+
+    remove(output_optimized.c_str());
+}
 #endif


@ -588,11 +612,11 @@ TEST(Highgui_WebP, encode_decode_lossless_webp)

    cv::Mat decode = cv::imdecode(buf, IMREAD_COLOR);
    ASSERT_FALSE(decode.empty());
-    EXPECT_TRUE(cv::norm(decode, img_webp, NORM_INF) == 0);
+    EXPECT_TRUE(cvtest::norm(decode, img_webp, NORM_INF) == 0);

    ASSERT_FALSE(img_webp.empty());

-    EXPECT_TRUE(cv::norm(img, img_webp, NORM_INF) == 0);
+    EXPECT_TRUE(cvtest::norm(img, img_webp, NORM_INF) == 0);
 }

 TEST(Highgui_WebP, encode_decode_lossy_webp)
--- a/modules/imgproc/doc/feature_detection.rst
+++ b/modules/imgproc/doc/feature_detection.rst
@ -1,4 +1,4 @@
-Feature Detection
+Feature Detection
 =================

 .. highlight:: cpp
@ -15,9 +15,9 @@ Finds edges in an image using the [Canny86]_ algorithm.

 .. ocv:cfunction:: void cvCanny( const CvArr* image, CvArr* edges, double threshold1, double threshold2, int aperture_size=3 )

-    :param image: single-channel 8-bit input image.
+    :param image: 8-bit input image.

-    :param edges: output edge map; it has the same size and type as  ``image`` .
+    :param edges: output edge map; single channels 8-bit image, which has the same size as  ``image`` .

    :param threshold1: first threshold for the hysteresis procedure.

--- a/modules/imgproc/perf/perf_bilateral.cpp
+++ b/modules/imgproc/perf/perf_bilateral.cpp
@ -34,5 +34,5 @@ PERF_TEST_P( TestBilateralFilter, BilateralFilter,

    TEST_CYCLE() bilateralFilter(src, dst, d, sigmaColor, sigmaSpace, BORDER_DEFAULT);

-    SANITY_CHECK(dst);
+    SANITY_CHECK(dst, .01, ERROR_RELATIVE);
 }
--- a/modules/imgproc/src/canny.cpp
+++ b/modules/imgproc/src/canny.cpp
@ -42,13 +42,13 @@
 #include "precomp.hpp"
 #include "opencl_kernels.hpp"

-/*
+
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
 #define USE_IPP_CANNY 1
 #else
 #undef USE_IPP_CANNY
 #endif
-*/
+

 namespace cv
 {
@ -81,8 +81,8 @@ static bool ippCanny(const Mat& _src, Mat& _dst, float low,  float high)
        return false;

    if( ippiCanny_16s8u_C1R(_dx.ptr<short>(), (int)_dx.step,
-                            _dy.ptr<short>(), (int)_dy.step,
-                            _dst.data, (int)_dst.step, roi, low, high, buffer) < 0 )
+                               _dy.ptr<short>(), (int)_dy.step,
+                              _dst.data, (int)_dst.step, roi, low, high, buffer) < 0 )
        return false;
    return true;
 }
@ -286,7 +286,7 @@ void cv::Canny( InputArray _src, OutputArray _dst,
 #endif

 #ifdef USE_IPP_CANNY
-    if( aperture_size == 3 && !L2gradient &&
+    if( aperture_size == 3 && !L2gradient && 1 == cn &&
        ippCanny(src, dst, (float)low_thresh, (float)high_thresh) )
        return;
 #endif
--- a/modules/imgproc/src/color.cpp
+++ b/modules/imgproc/src/color.cpp
@ -252,6 +252,7 @@ bool CvtColorIPPLoopCopy(Mat& src, Mat& dst, const Cvt& cvt)
    }
    bool ok;
    parallel_for_(Range(0, source.rows), CvtColorIPPLoop_Invoker<Cvt>(source, dst, cvt, &ok), source.total()/(double)(1<<16) );
+    //ok = cvt(src.ptr<uchar>(0), (int)src.step[0], dst.ptr<uchar>(0), (int)dst.step[0], src.cols, src.rows);
    return ok;
 }

@ -297,11 +298,13 @@ static ippiReorderFunc ippiSwapChannelsC3RTab[] =
    0, (ippiReorderFunc)ippiSwapChannels_32f_C3R, 0, 0
 };

+#if (IPP_VERSION_X100 >= 801)
 static ippiReorderFunc ippiSwapChannelsC4RTab[] =
 {
-    (ippiReorderFunc)ippiSwapChannels_8u_AC4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_AC4R, 0,
-    0, (ippiReorderFunc)ippiSwapChannels_32f_AC4R, 0, 0
+    (ippiReorderFunc)ippiSwapChannels_8u_C4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4R, 0,
+    0, (ippiReorderFunc)ippiSwapChannels_32f_C4R, 0, 0
 };
+#endif

 static ippiColor2GrayFunc ippiColor2GrayC3Tab[] =
 {
@ -3251,11 +3254,13 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
                if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC3RTab[depth], 2, 1, 0)) )
                    return;
            }
+#if (IPP_VERSION_X100 >= 801)
            else if( code == CV_RGBA2BGRA )
            {
                if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC4RTab[depth], 2, 1, 0)) )
                    return;
            }
+#endif
 #endif

            if( depth == CV_8U )
@ -3310,14 +3315,17 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
            CV_Assert( scn == 3 || scn == 4 );
            _dst.create(sz, CV_MAKETYPE(depth, 1));
            dst = _dst.getMat();
-/*
+/**/
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+/*
            if( code == CV_BGR2GRAY )
            {
                if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC3Tab[depth])) )
                    return;
            }
-            else if( code == CV_RGB2GRAY )
+            else
+*/
+            if( code == CV_RGB2GRAY )
            {
                if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC3Tab[depth])) )
                    return;
@ -3333,7 +3341,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
                    return;
            }
 #endif
-*/
+/**/
            bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;

            if( depth == CV_8U )
--- a/modules/imgproc/src/deriv.cpp
+++ b/modules/imgproc/src/deriv.cpp
@ -11,6 +11,7 @@
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Copyright (C) 2014, Itseez, Inc, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@ -40,6 +41,8 @@
 //M*/

 #include "precomp.hpp"
+#include "opencl_kernels.hpp"
+
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
 static IppStatus sts = ippInit();
 #endif
@ -187,223 +190,231 @@ namespace cv

 static bool IPPDerivScharr(const Mat& src, Mat& dst, int ddepth, int dx, int dy, double scale)
 {
-   int bufSize = 0;
-   cv::AutoBuffer<char> buffer;
-   IppiSize roi = ippiSize(src.cols, src.rows);
+    int bufSize = 0;
+    cv::AutoBuffer<char> buffer;
+    IppiSize roi = ippiSize(src.cols, src.rows);

-   if( ddepth < 0 )
-     ddepth = src.depth();
+    if( ddepth < 0 )
+        ddepth = src.depth();

-   dst.create( src.size(), CV_MAKETYPE(ddepth, src.channels()) );
+    dst.create( src.size(), CV_MAKETYPE(ddepth, src.channels()) );

-   switch(src.type())
-   {
-      case CV_8U:
-         {
+    switch(src.type())
+    {
+    case CV_8U:
+        {
            if(scale != 1)
                return false;

            switch(dst.type())
            {
-               case CV_16S:
-               {
-                  if((dx == 1) && (dy == 0))
-                  {
-                     ippiFilterScharrVertGetBufferSize_8u16s_C1R(roi,&bufSize);
-                     buffer.allocate(bufSize);
-
-                     ippiFilterScharrVertBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
-                        (Ipp16s*)dst.data, (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer);
-
-                     return true;
-                  }
+            case CV_16S:
+                {
+                    if ((dx == 1) && (dy == 0))
+                    {
+                        if (0 > ippiFilterScharrVertGetBufferSize_8u16s_C1R(roi,&bufSize))
+                            return false;
+                        buffer.allocate(bufSize);
+                        return (0 <= ippiFilterScharrVertBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
+                                        (Ipp16s*)dst.data, (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
+                    }
+                    if ((dx == 0) && (dy == 1))
+                    {
+                        if (0 > ippiFilterScharrHorizGetBufferSize_8u16s_C1R(roi,&bufSize))
+                            return false;
+                        buffer.allocate(bufSize);
+                        return (0 <= ippiFilterScharrHorizBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
+                                            (Ipp16s*)dst.data, (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
+                    }
+                    return false;
+                }
+            default:
+                return false;
+            }
+        }
+    case CV_32F:
+        {
+            switch(dst.type())
+            {
+            case CV_32F:
+                {
+                    if ((dx == 1) && (dy == 0))
+                    {
+                        if (0 > ippiFilterScharrVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize))
+                            return false;
+                        buffer.allocate(bufSize);
+
+                        if (0 > ippiFilterScharrVertBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
+                                        (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows),
+                                        ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
+                        {
+                            return false;
+                        }
+
+                        if (scale != 1)
+                            /* IPP is fast, so MulC produce very little perf degradation.*/
+                            //ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f*)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
+                            ippiMulC_32f_C1R((Ipp32f*)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f*)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
+                        return true;
+                    }
+                    if ((dx == 0) && (dy == 1))
+                    {
+                        if (0 > ippiFilterScharrHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize))
+                            return false;
+                        buffer.allocate(bufSize);
+
+                        if (0 > ippiFilterScharrHorizBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
+                                        (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows),
+                                        ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
+                            return false;
+
+                        if (scale != 1)
+                            ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
+                        return true;
+                    }
+                }
+            default:
+                return false;
+            }
+        }
+    default:
+        return false;
+    }
+}

-                  if((dx == 0) && (dy == 1))
-                  {
-                     ippiFilterScharrHorizGetBufferSize_8u16s_C1R(roi,&bufSize);
-                     buffer.allocate(bufSize);

-                     ippiFilterScharrHorizBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
-                        (Ipp16s*)dst.data, (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer);
+static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int ksize, double scale)
+{
+    int bufSize = 0;
+    cv::AutoBuffer<char> buffer;
+    if (ksize == 3 || ksize == 5)
+    {
+        if ( ddepth < 0 )
+            ddepth = src.depth();

-                     return true;
-                  }
-               }
+        if (src.type() == CV_8U && dst.type() == CV_16S && scale == 1)
+        {
+            if ((dx == 1) && (dy == 0))
+            {
+                if (0 > ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
+                    return false;
+                buffer.allocate(bufSize);

-               default:
-                  return false;
+                return (0 <= ippiFilterSobelNegVertBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
+                                    (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
+                                    ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
            }
-         }

-      case CV_32F:
-         {
-            switch(dst.type())
+            if ((dx == 0) && (dy == 1))
            {
-               case CV_32F:
-               if((dx == 1) && (dy == 0))
-               {
-                  ippiFilterScharrVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize);
-                  buffer.allocate(bufSize);
-
-                  ippiFilterScharrVertBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
-                     (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows),
-                                            ippBorderRepl, 0, (Ipp8u*)(char*)buffer);
-                  if(scale != 1)
-                     /* IPP is fast, so MulC produce very little perf degradation */
-                     ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f*)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
-
-                  return true;
-               }
-
-               if((dx == 0) && (dy == 1))
-               {
-                  ippiFilterScharrHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize);
-                  buffer.allocate(bufSize);
-
-                  ippiFilterScharrHorizBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
-                     (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows),
-                                            ippBorderRepl, 0, (Ipp8u*)(char*)buffer);
-                  if(scale != 1)
-                     ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
-
-                  return true;
-               }
-
-               default:
-                  return false;
-            }
-         }
+                if (0 > ippiFilterSobelHorizGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
+                    return false;
+                buffer.allocate(bufSize);

-      default:
-         return false;
-   }
-}
+                return (0 <= ippiFilterSobelHorizBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
+                                    (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
+                                    ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
+            }

+            if ((dx == 2) && (dy == 0))
+            {
+                if (0 > ippiFilterSobelVertSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
+                    return false;
+                buffer.allocate(bufSize);

-static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int ksize, double scale)
-{
-   int bufSize = 0;
-   cv::AutoBuffer<char> buffer;
-
-   if(ksize == 3 || ksize == 5)
-   {
-      if( ddepth < 0 )
-          ddepth = src.depth();
-
-      if(src.type() == CV_8U && dst.type() == CV_16S && scale == 1)
-      {
-         if((dx == 1) && (dy == 0))
-         {
-            ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize);
-            buffer.allocate(bufSize);
-
-            ippiFilterSobelNegVertBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
-               (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
-                                      ippBorderRepl, 0, (Ipp8u*)(char*)buffer);
-            return true;
-         }
-
-         if((dx == 0) && (dy == 1))
-         {
-            ippiFilterSobelHorizGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize);
-            buffer.allocate(bufSize);
-
-            ippiFilterSobelHorizBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
-               (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
-                                      ippBorderRepl, 0, (Ipp8u*)(char*)buffer);
-
-            return true;
-         }
-
-         if((dx == 2) && (dy == 0))
-         {
-            ippiFilterSobelVertSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize);
-            buffer.allocate(bufSize);
-
-            ippiFilterSobelVertSecondBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
-               (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
-                                      ippBorderRepl, 0, (Ipp8u*)(char*)buffer);
-
-            return true;
-         }
-
-         if((dx == 0) && (dy == 2))
-         {
-            ippiFilterSobelHorizSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize);
-            buffer.allocate(bufSize);
-
-            ippiFilterSobelHorizSecondBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
-               (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
-                                      ippBorderRepl, 0, (Ipp8u*)(char*)buffer);
-
-            return true;
-         }
-      }
-
-      if(src.type() == CV_32F && dst.type() == CV_32F)
-      {
-         if((dx == 1) && (dy == 0))
-         {
-            ippiFilterSobelNegVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), &bufSize);
-            buffer.allocate(bufSize);
-
-            ippiFilterSobelNegVertBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
-               (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
-                                      ippBorderRepl, 0, (Ipp8u*)(char*)buffer);
-            if(scale != 1)
-               ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
+                return (0 <= ippiFilterSobelVertSecondBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
+                                    (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
+                                    ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
+            }

-            return true;
-         }
+            if ((dx == 0) && (dy == 2))
+            {
+                if (0 > ippiFilterSobelHorizSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
+                    return false;
+                buffer.allocate(bufSize);

-         if((dx == 0) && (dy == 1))
-         {
-            ippiFilterSobelHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize);
-            buffer.allocate(bufSize);
+                return (0 <= ippiFilterSobelHorizSecondBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
+                                    (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
+                                    ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
+            }
+        }

-            ippiFilterSobelHorizBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
-               (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
-                                      ippBorderRepl, 0, (Ipp8u*)(char*)buffer);
-            if(scale != 1)
-               ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
+        if (src.type() == CV_32F && dst.type() == CV_32F)
+        {
+            if ((dx == 1) && (dy == 0))
+            {
+                if (0 > ippiFilterSobelNegVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), &bufSize))
+                    return false;
+                buffer.allocate(bufSize);

-            return true;
-         }
+                if (0 > ippiFilterSobelNegVertBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
+                                (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
+                                ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
+                {
+                    return false;
+                }
+                if(scale != 1)
+                    ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
+                return true;
+            }

-         if((dx == 2) && (dy == 0))
-         {
-            ippiFilterSobelVertSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize);
-            buffer.allocate(bufSize);
+            if ((dx == 0) && (dy == 1))
+            {
+                if (0 > ippiFilterSobelHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
+                    return false;
+                buffer.allocate(bufSize);

-            ippiFilterSobelVertSecondBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
-               (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
-                                      ippBorderRepl, 0, (Ipp8u*)(char*)buffer);
-            if(scale != 1)
-               ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
+                if (0 > ippiFilterSobelHorizBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
+                                (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
+                                ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
+                {
+                    return false;
+                }
+                if(scale != 1)
+                    ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
+                return true;
+            }

-            return true;
-         }
+            if((dx == 2) && (dy == 0))
+            {
+                if (0 > ippiFilterSobelVertSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
+                    return false;
+                buffer.allocate(bufSize);

-         if((dx == 0) && (dy == 2))
-         {
-            ippiFilterSobelHorizSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize);
-            buffer.allocate(bufSize);
+                if (0 > ippiFilterSobelVertSecondBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
+                                (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
+                                ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
+                {
+                    return false;
+                }
+                if(scale != 1)
+                    ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
+                return true;
+            }

-            ippiFilterSobelHorizSecondBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
-               (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
-                                      ippBorderRepl, 0, (Ipp8u*)(char*)buffer);
-            if(scale != 1)
-               ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
+            if((dx == 0) && (dy == 2))
+            {
+                if (0 > ippiFilterSobelHorizSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
+                    return false;
+                buffer.allocate(bufSize);

-            return true;
-         }
-      }
-   }
+                if (0 > ippiFilterSobelHorizSecondBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
+                                (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
+                                ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
+                {
+                    return false;
+                }

-   if(ksize <= 0)
-      return IPPDerivScharr(src, dst, ddepth, dx, dy, scale);
+                if(scale != 1)
+                    ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
+                return true;
+            }
+        }
+    }

-   return false;
+    if(ksize <= 0)
+        return IPPDerivScharr(src, dst, ddepth, dx, dy, scale);
+    return false;
 }

 }
@ -433,7 +444,7 @@ void cv::Sobel( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
    if(dx < 3 && dy < 3 && cn == 1 && borderType == BORDER_REPLICATE)
    {
        Mat src = _src.getMat(), dst = _dst.getMat();
-        if(IPPDeriv(src, dst, ddepth, dx, dy, ksize,scale))
+        if (IPPDeriv(src, dst, ddepth, dx, dy, ksize,scale))
            return;
    }
 #endif
@ -495,6 +506,58 @@ void cv::Scharr( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
    sepFilter2D( _src, _dst, ddepth, kx, ky, Point(-1, -1), delta, borderType );
 }

+#ifdef HAVE_OPENCL
+
+namespace cv {
+
+static bool ocl_Laplacian5(InputArray _src, OutputArray _dst,
+                           const Mat & kd, const Mat & ks, double scale, double delta,
+                           int borderType, int depth, int ddepth)
+{
+    int iscale = cvRound(scale), idelta = cvRound(delta);
+    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0,
+            floatCoeff = std::fabs(delta - idelta) > DBL_EPSILON || std::fabs(scale - iscale) > DBL_EPSILON;
+    int cn = _src.channels(), wdepth = std::max(depth, floatCoeff ? CV_32F : CV_32S), kercn = 1;
+
+    if (!doubleSupport && wdepth == CV_64F)
+        return false;
+
+    char cvt[2][40];
+    ocl::Kernel k("sumConvert", ocl::imgproc::laplacian5_oclsrc,
+                  format("-D srcT=%s -D WT=%s -D dstT=%s -D coeffT=%s -D wdepth=%d "
+                         "-D convertToWT=%s -D convertToDT=%s%s",
+                         ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
+                         ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)),
+                         ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
+                         ocl::typeToStr(wdepth), wdepth,
+                         ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]),
+                         ocl::convertTypeStr(wdepth, ddepth, kercn, cvt[1]),
+                         doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
+    if (k.empty())
+        return false;
+
+    UMat d2x, d2y;
+    sepFilter2D(_src, d2x, depth, kd, ks, Point(-1, -1), 0, borderType);
+    sepFilter2D(_src, d2y, depth, ks, kd, Point(-1, -1), 0, borderType);
+
+    UMat dst = _dst.getUMat();
+
+    ocl::KernelArg d2xarg = ocl::KernelArg::ReadOnlyNoSize(d2x),
+            d2yarg = ocl::KernelArg::ReadOnlyNoSize(d2y),
+            dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);
+
+    if (wdepth >= CV_32F)
+        k.args(d2xarg, d2yarg, dstarg, (float)scale, (float)delta);
+    else
+        k.args(d2xarg, d2yarg, dstarg, iscale, idelta);
+
+    size_t globalsize[] = { dst.cols * cn / kercn, dst.rows };
+    return k.run(2, globalsize, NULL, false);
+}
+
+}
+
+#endif

 void cv::Laplacian( InputArray _src, OutputArray _dst, int ddepth, int ksize,
                    double scale, double delta, int borderType )
@ -531,27 +594,28 @@ void cv::Laplacian( InputArray _src, OutputArray _dst, int ddepth, int ksize,
    }
    else
    {
-        Mat src = _src.getMat(), dst = _dst.getMat();
-        const size_t STRIPE_SIZE = 1 << 14;
-
-        int depth = src.depth();
-        int ktype = std::max(CV_32F, std::max(ddepth, depth));
-        int wdepth = depth == CV_8U && ksize <= 5 ? CV_16S : depth <= CV_32F ? CV_32F : CV_64F;
-        int wtype = CV_MAKETYPE(wdepth, src.channels());
+        int ktype = std::max(CV_32F, std::max(ddepth, sdepth));
+        int wdepth = sdepth == CV_8U && ksize <= 5 ? CV_16S : sdepth <= CV_32F ? CV_32F : CV_64F;
+        int wtype = CV_MAKETYPE(wdepth, cn);
        Mat kd, ks;
        getSobelKernels( kd, ks, 2, 0, ksize, false, ktype );
-        int dtype = CV_MAKETYPE(ddepth, src.channels());

-        int dy0 = std::min(std::max((int)(STRIPE_SIZE/(getElemSize(src.type())*src.cols)), 1), src.rows);
-        Ptr<FilterEngine> fx = createSeparableLinearFilter(src.type(),
+        CV_OCL_RUN(_dst.isUMat(),
+                   ocl_Laplacian5(_src, _dst, kd, ks, scale,
+                                  delta, borderType, wdepth, ddepth))
+
+        const size_t STRIPE_SIZE = 1 << 14;
+        Ptr<FilterEngine> fx = createSeparableLinearFilter(stype,
            wtype, kd, ks, Point(-1,-1), 0, borderType, borderType, Scalar() );
-        Ptr<FilterEngine> fy = createSeparableLinearFilter(src.type(),
+        Ptr<FilterEngine> fy = createSeparableLinearFilter(stype,
            wtype, ks, kd, Point(-1,-1), 0, borderType, borderType, Scalar() );

+        Mat src = _src.getMat(), dst = _dst.getMat();
        int y = fx->start(src), dsty = 0, dy = 0;
        fy->start(src);
        const uchar* sptr = src.data + y*src.step;

+        int dy0 = std::min(std::max((int)(STRIPE_SIZE/(CV_ELEM_SIZE(stype)*src.cols)), 1), src.rows);
        Mat d2x( dy0 + kd.rows - 1, src.cols, wtype );
        Mat d2y( dy0 + kd.rows - 1, src.cols, wtype );

@ -564,7 +628,7 @@ void cv::Laplacian( InputArray _src, OutputArray _dst, int ddepth, int ksize,
                Mat dstripe = dst.rowRange(dsty, dsty + dy);
                d2x.rows = d2y.rows = dy; // modify the headers, which should work
                d2x += d2y;
-                d2x.convertTo( dstripe, dtype, scale, delta );
+                d2x.convertTo( dstripe, ddepth, scale, delta );
            }
        }
    }
--- a/modules/imgproc/src/featureselect.cpp
+++ b/modules/imgproc/src/featureselect.cpp
@ -164,6 +164,12 @@ static bool ocl_goodFeaturesToTrack( InputArray _image, OutputArray _corners,
            return false;

        total = std::min<size_t>(counter.getMat(ACCESS_READ).at<int>(0, 0), possibleCornersCount);
+        if (total == 0)
+        {
+            _corners.release();
+            return true;
+        }
+
        tmpCorners.resize(total);

        Mat mcorners(1, (int)total, CV_32FC2, &tmpCorners[0]);
--- a/modules/imgproc/src/filter.cpp
+++ b/modules/imgproc/src/filter.cpp
@ -47,7 +47,7 @@
                                    Base Image Filter
 \****************************************************************************************/

-#if defined HAVE_IPP && IPP_VERSION_MAJOR*100 + IPP_VERSION_MINOR >= 701
+#if IPP_VERSION_X100 >= 701
 #define USE_IPP_SEP_FILTERS 1
 #else
 #undef USE_IPP_SEP_FILTERS
@ -1420,36 +1420,16 @@ struct RowVec_32f

    int operator()(const uchar* _src, uchar* _dst, int width, int cn) const
    {
+#ifdef USE_IPP_SEP_FILTERS
+        int ret = ippiOperator(_src, _dst, width, cn);
+        if (ret > 0)
+            return ret;
+#endif
        int _ksize = kernel.rows + kernel.cols - 1;
        const float* src0 = (const float*)_src;
        float* dst = (float*)_dst;
        const float* _kx = (const float*)kernel.data;

-#ifdef USE_IPP_SEP_FILTERS
-        IppiSize roisz = { width, 1 };
-        if( (cn == 1 || cn == 3) && width >= _ksize*8 )
-        {
-            if( bufsz < 0 )
-            {
-                if( (cn == 1 && ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(roisz, _ksize, &bufsz) < 0) ||
-                    (cn == 3 && ippiFilterRowBorderPipelineGetBufferSize_32f_C3R(roisz, _ksize, &bufsz) < 0))
-                    return 0;
-            }
-            AutoBuffer<uchar> buf(bufsz + 64);
-            uchar* bufptr = alignPtr((uchar*)buf, 32);
-            int step = (int)(width*sizeof(dst[0])*cn);
-            float borderValue[] = {0.f, 0.f, 0.f};
-            // here is the trick. IPP needs border type and extrapolates the row. We did it already.
-            // So we pass anchor=0 and ignore the right tail of results since they are incorrect there.
-            if( (cn == 1 && ippiFilterRowBorderPipeline_32f_C1R(src0, step, &dst, roisz, _kx, _ksize, 0,
-                                                                ippBorderRepl, borderValue[0], bufptr) < 0) ||
-                (cn == 3 && ippiFilterRowBorderPipeline_32f_C3R(src0, step, &dst, roisz, _kx, _ksize, 0,
-                                                                ippBorderRepl, borderValue, bufptr) < 0))
-                return 0;
-            return width - _ksize + 1;
-        }
-#endif
-
        if( !haveSSE )
            return 0;

@ -1479,7 +1459,38 @@ struct RowVec_32f
    Mat kernel;
    bool haveSSE;
 #ifdef USE_IPP_SEP_FILTERS
+private:
    mutable int bufsz;
+    int ippiOperator(const uchar* _src, uchar* _dst, int width, int cn) const
+    {
+        int _ksize = kernel.rows + kernel.cols - 1;
+        if ((1 != cn && 3 != cn) || width < _ksize*8)
+            return 0;
+
+        const float* src = (const float*)_src;
+        float* dst = (float*)_dst;
+        const float* _kx = (const float*)kernel.data;
+
+        IppiSize roisz = { width, 1 };
+        if( bufsz < 0 )
+        {
+            if( (cn == 1 && ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(roisz, _ksize, &bufsz) < 0) ||
+                (cn == 3 && ippiFilterRowBorderPipelineGetBufferSize_32f_C3R(roisz, _ksize, &bufsz) < 0))
+                return 0;
+        }
+        AutoBuffer<uchar> buf(bufsz + 64);
+        uchar* bufptr = alignPtr((uchar*)buf, 32);
+        int step = (int)(width*sizeof(dst[0])*cn);
+        float borderValue[] = {0.f, 0.f, 0.f};
+        // here is the trick. IPP needs border type and extrapolates the row. We did it already.
+        // So we pass anchor=0 and ignore the right tail of results since they are incorrect there.
+        if( (cn == 1 && ippiFilterRowBorderPipeline_32f_C1R(src, step, &dst, roisz, _kx, _ksize, 0,
+                                                            ippBorderRepl, borderValue[0], bufptr) < 0) ||
+            (cn == 3 && ippiFilterRowBorderPipeline_32f_C3R(src, step, &dst, roisz, _kx, _ksize, 0,
+                                                            ippBorderRepl, borderValue, bufptr) < 0))
+            return 0;
+        return width - _ksize + 1;
+    }
 #endif
 };

--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@ -55,7 +55,7 @@ static IppStatus sts = ippInit();

 namespace cv
 {
-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR*100 + IPP_VERSION_MINOR >= 701)
+#if IPP_VERSION_X100 >= 701
    typedef IppStatus (CV_STDCALL* ippiResizeFunc)(const void*, int, const void*, int, IppiPoint, IppiSize, IppiBorderType, void*, void*, Ipp8u*);
    typedef IppStatus (CV_STDCALL* ippiResizeGetBufferSize)(void*, IppiSize, Ipp32u, int*);
    typedef IppStatus (CV_STDCALL* ippiResizeGetSrcOffset)(void*, IppiPoint, IppiPoint*);
@ -1912,76 +1912,77 @@ static int computeResizeAreaTab( int ssize, int dsize, int cn, double scale, Dec
    getBufferSizeFunc = (ippiResizeGetBufferSize)ippiResizeGetBufferSize_##TYPE;\
    getSrcOffsetFunc =  (ippiResizeGetSrcOffset)ippiResizeGetSrcOffset_##TYPE;

-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR*100 + IPP_VERSION_MINOR >= 701)
+#if IPP_VERSION_X100 >= 701
 class IPPresizeInvoker :
    public ParallelLoopBody
 {
 public:
-    IPPresizeInvoker(Mat &_src, Mat &_dst, double _inv_scale_x, double _inv_scale_y, int _mode, bool *_ok) :
-      ParallelLoopBody(), src(_src), dst(_dst), inv_scale_x(_inv_scale_x), inv_scale_y(_inv_scale_y), mode(_mode), ok(_ok)
-      {
-          *ok = true;
-          IppiSize srcSize, dstSize;
-          int type = src.type();
-          int specSize = 0, initSize = 0;
-          srcSize.width  = src.cols;
-          srcSize.height = src.rows;
-          dstSize.width  = dst.cols;
-          dstSize.height = dst.rows;
-
-          switch (type)
-          {
-          case CV_8UC1:  SET_IPP_RESIZE_PTR(8u,C1);  break;
-          case CV_8UC3:  SET_IPP_RESIZE_PTR(8u,C3);  break;
-          case CV_8UC4:  SET_IPP_RESIZE_PTR(8u,C4);  break;
-          case CV_16UC1: SET_IPP_RESIZE_PTR(16u,C1); break;
-          case CV_16UC3: SET_IPP_RESIZE_PTR(16u,C3); break;
-          case CV_16UC4: SET_IPP_RESIZE_PTR(16u,C4); break;
-          case CV_16SC1: SET_IPP_RESIZE_PTR(16s,C1); break;
-          case CV_16SC3: SET_IPP_RESIZE_PTR(16s,C3); break;
-          case CV_16SC4: SET_IPP_RESIZE_PTR(16s,C4); break;
-          case CV_32FC1: SET_IPP_RESIZE_PTR(32f,C1); break;
-          case CV_32FC3: SET_IPP_RESIZE_PTR(32f,C3); break;
-          case CV_32FC4: SET_IPP_RESIZE_PTR(32f,C4); break;
-          case CV_64FC1: SET_IPP_RESIZE_LINEAR_FUNC_64_PTR(64f,C1); break;
-          case CV_64FC3: SET_IPP_RESIZE_LINEAR_FUNC_64_PTR(64f,C3); break;
-          case CV_64FC4: SET_IPP_RESIZE_LINEAR_FUNC_64_PTR(64f,C4); break;
-          default: { *ok = false; return;} break;
-          }
-      }
+    IPPresizeInvoker(const Mat & _src, Mat & _dst, double _inv_scale_x, double _inv_scale_y, int _mode, bool *_ok) :
+        ParallelLoopBody(), src(_src), dst(_dst), inv_scale_x(_inv_scale_x), inv_scale_y(_inv_scale_y), mode(_mode), ok(_ok)
+    {
+        *ok = true;
+        IppiSize srcSize, dstSize;
+        int type = src.type();
+        int specSize = 0, initSize = 0;
+        srcSize.width  = src.cols;
+        srcSize.height = src.rows;
+        dstSize.width  = dst.cols;
+        dstSize.height = dst.rows;

-      ~IPPresizeInvoker()
-      {
-      }
+        switch (type)
+        {
+            case CV_8UC1:  SET_IPP_RESIZE_PTR(8u,C1);  break;
+            case CV_8UC3:  SET_IPP_RESIZE_PTR(8u,C3);  break;
+            case CV_8UC4:  SET_IPP_RESIZE_PTR(8u,C4);  break;
+            case CV_16UC1: SET_IPP_RESIZE_PTR(16u,C1); break;
+            case CV_16UC3: SET_IPP_RESIZE_PTR(16u,C3); break;
+            case CV_16UC4: SET_IPP_RESIZE_PTR(16u,C4); break;
+            case CV_16SC1: SET_IPP_RESIZE_PTR(16s,C1); break;
+            case CV_16SC3: SET_IPP_RESIZE_PTR(16s,C3); break;
+            case CV_16SC4: SET_IPP_RESIZE_PTR(16s,C4); break;
+            case CV_32FC1: SET_IPP_RESIZE_PTR(32f,C1); break;
+            case CV_32FC3: SET_IPP_RESIZE_PTR(32f,C3); break;
+            case CV_32FC4: SET_IPP_RESIZE_PTR(32f,C4); break;
+            case CV_64FC1: SET_IPP_RESIZE_LINEAR_FUNC_64_PTR(64f,C1); break;
+            case CV_64FC3: SET_IPP_RESIZE_LINEAR_FUNC_64_PTR(64f,C3); break;
+            case CV_64FC4: SET_IPP_RESIZE_LINEAR_FUNC_64_PTR(64f,C4); break;
+            default: { *ok = false; return; } break;
+        }
+    }

-      virtual void operator() (const Range& range) const
-      {
-          if (*ok == false) return;
+    ~IPPresizeInvoker()
+    {
+    }

-          int cn = src.channels();
-          int dsty = min(cvRound(range.start * inv_scale_y), dst.rows);
-          int dstwidth  = min(cvRound(src.cols * inv_scale_x), dst.cols);
-          int dstheight = min(cvRound(range.end * inv_scale_y), dst.rows);
+    virtual void operator() (const Range& range) const
+    {
+        if (*ok == false)
+          return;
+
+        int cn = src.channels();
+        int dsty = min(cvRound(range.start * inv_scale_y), dst.rows);
+        int dstwidth  = min(cvRound(src.cols * inv_scale_x), dst.cols);
+        int dstheight = min(cvRound(range.end * inv_scale_y), dst.rows);

-          IppiPoint dstOffset = { 0, dsty }, srcOffset = {0, 0};
-          IppiSize  dstSize   = { dstwidth, dstheight - dsty };
-          int bufsize = 0, itemSize = (int)src.elemSize1();
+        IppiPoint dstOffset = { 0, dsty }, srcOffset = {0, 0};
+        IppiSize  dstSize   = { dstwidth, dstheight - dsty };
+        int bufsize = 0, itemSize = (int)src.elemSize1();

-          CHECK_IPP_STATUS(getBufferSizeFunc(pSpec, dstSize, cn, &bufsize));
-          CHECK_IPP_STATUS(getSrcOffsetFunc(pSpec, dstOffset, &srcOffset));
+        CHECK_IPP_STATUS(getBufferSizeFunc(pSpec, dstSize, cn, &bufsize));
+        CHECK_IPP_STATUS(getSrcOffsetFunc(pSpec, dstOffset, &srcOffset));

-          Ipp8u* pSrc = (Ipp8u*)src.data + (int)src.step[0] * srcOffset.y + srcOffset.x * cn * itemSize;
-          Ipp8u* pDst = (Ipp8u*)dst.data + (int)dst.step[0] * dstOffset.y + dstOffset.x * cn * itemSize;
+        const Ipp8u* pSrc = (const Ipp8u*)src.data + (int)src.step[0] * srcOffset.y + srcOffset.x * cn * itemSize;
+        Ipp8u* pDst = (Ipp8u*)dst.data + (int)dst.step[0] * dstOffset.y + dstOffset.x * cn * itemSize;

-          AutoBuffer<uchar> buf(bufsize + 64);
-          uchar* bufptr = alignPtr((uchar*)buf, 32);
+        AutoBuffer<uchar> buf(bufsize + 64);
+        uchar* bufptr = alignPtr((uchar*)buf, 32);

-          if( func( pSrc, (int)src.step[0], pDst, (int)dst.step[0], dstOffset, dstSize, ippBorderRepl, 0, pSpec, bufptr ) < 0 )
-              *ok = false;
-      }
+        if( func( pSrc, (int)src.step[0], pDst, (int)dst.step[0], dstOffset, dstSize, ippBorderRepl, 0, pSpec, bufptr ) < 0 )
+            *ok = false;
+    }
 private:
-    Mat &src;
-    Mat &dst;
+    const Mat & src;
+    Mat & dst;
    double inv_scale_x;
    double inv_scale_y;
    void *pSpec;
@ -1993,12 +1994,13 @@ private:
    bool *ok;
    const IPPresizeInvoker& operator= (const IPPresizeInvoker&);
 };
+
 #endif

 #ifdef HAVE_OPENCL

 static void ocl_computeResizeAreaTabs(int ssize, int dsize, double scale, int * const map_tab,
-                                          float * const alpha_tab, int * const ofs_tab)
+                                      float * const alpha_tab, int * const ofs_tab)
 {
    int k = 0, dx = 0;
    for ( ; dx < dsize; dx++)
@ -2049,8 +2051,16 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
 {
    int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);

-    double inv_fx = 1. / fx, inv_fy = 1. / fy;
+    double inv_fx = 1.0 / fx, inv_fy = 1.0 / fy;
    float inv_fxf = (float)inv_fx, inv_fyf = (float)inv_fy;
+    int iscale_x = saturate_cast<int>(inv_fx), iscale_y = saturate_cast<int>(inv_fx);
+    bool is_area_fast = std::abs(inv_fx - iscale_x) < DBL_EPSILON &&
+        std::abs(inv_fy - iscale_y) < DBL_EPSILON;
+
+    // in case of scale_x && scale_y is equal to 2
+    // INTER_AREA (fast) also is equal to INTER_LINEAR
+    if( interpolation == INTER_LINEAR && is_area_fast && iscale_x == 2 && iscale_y == 2 )
+        /*interpolation = INTER_AREA*/(void)0; // INTER_AREA is slower

    if( !(cn <= 4 &&
           (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR ||
@ -2061,39 +2071,105 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
    _dst.create(dsize, type);
    UMat dst = _dst.getUMat();

+    Size ssize = src.size();
    ocl::Kernel k;
    size_t globalsize[] = { dst.cols, dst.rows };

    if (interpolation == INTER_LINEAR)
    {
-        int wdepth = std::max(depth, CV_32S);
-        int wtype = CV_MAKETYPE(wdepth, cn);
        char buf[2][32];
-        k.create("resizeLN", ocl::imgproc::resize_oclsrc,
-                 format("-D INTER_LINEAR -D depth=%d -D PIXTYPE=%s -D PIXTYPE1=%s "
-                        "-D WORKTYPE=%s -D convertToWT=%s -D convertToDT=%s -D cn=%d",
-                        depth, ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype),
-                        ocl::convertTypeStr(depth, wdepth, cn, buf[0]),
-                        ocl::convertTypeStr(wdepth, depth, cn, buf[1]),
-                        cn));
+
+        // integer path is slower because of CPU part, so it's disabled
+        if (depth == CV_8U && ((void)0, 0))
+        {
+            AutoBuffer<uchar> _buffer((dsize.width + dsize.height)*(sizeof(int) + sizeof(short)*2));
+            int* xofs = (int*)(uchar*)_buffer, * yofs = xofs + dsize.width;
+            short* ialpha = (short*)(yofs + dsize.height), * ibeta = ialpha + dsize.width*2;
+            float fxx, fyy;
+            int sx, sy;
+
+            for (int dx = 0; dx < dsize.width; dx++)
+            {
+                fxx = (float)((dx+0.5)*inv_fx - 0.5);
+                sx = cvFloor(fxx);
+                fxx -= sx;
+
+                if (sx < 0)
+                    fxx = 0, sx = 0;
+
+                if (sx >= ssize.width-1)
+                    fxx = 0, sx = ssize.width-1;
+
+                xofs[dx] = sx;
+                ialpha[dx*2 + 0] = saturate_cast<short>((1.f - fxx) * INTER_RESIZE_COEF_SCALE);
+                ialpha[dx*2 + 1] = saturate_cast<short>(fxx         * INTER_RESIZE_COEF_SCALE);
+            }
+
+            for (int dy = 0; dy < dsize.height; dy++)
+            {
+                fyy = (float)((dy+0.5)*inv_fy - 0.5);
+                sy = cvFloor(fyy);
+                fyy -= sy;
+
+                yofs[dy] = sy;
+                ibeta[dy*2 + 0] = saturate_cast<short>((1.f - fyy) * INTER_RESIZE_COEF_SCALE);
+                ibeta[dy*2 + 1] = saturate_cast<short>(fyy         * INTER_RESIZE_COEF_SCALE);
+            }
+
+            int wdepth = std::max(depth, CV_32S), wtype = CV_MAKETYPE(wdepth, cn);
+            UMat coeffs;
+            Mat(1, static_cast<int>(_buffer.size()), CV_8UC1, (uchar *)_buffer).copyTo(coeffs);
+
+            k.create("resizeLN", ocl::imgproc::resize_oclsrc,
+                     format("-D INTER_LINEAR_INTEGER -D depth=%d -D T=%s -D T1=%s "
+                            "-D WT=%s -D convertToWT=%s -D convertToDT=%s -D cn=%d "
+                            "-D INTER_RESIZE_COEF_BITS=%d",
+                            depth, ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype),
+                            ocl::convertTypeStr(depth, wdepth, cn, buf[0]),
+                            ocl::convertTypeStr(wdepth, depth, cn, buf[1]),
+                            cn, INTER_RESIZE_COEF_BITS));
+            if (k.empty())
+                return false;
+
+            k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst),
+                   ocl::KernelArg::PtrReadOnly(coeffs));
+        }
+        else
+        {
+            int wdepth = std::max(depth, CV_32S), wtype = CV_MAKETYPE(wdepth, cn);
+            k.create("resizeLN", ocl::imgproc::resize_oclsrc,
+                     format("-D INTER_LINEAR -D depth=%d -D T=%s -D T1=%s "
+                            "-D WT=%s -D convertToWT=%s -D convertToDT=%s -D cn=%d "
+                            "-D INTER_RESIZE_COEF_BITS=%d",
+                            depth, ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype),
+                            ocl::convertTypeStr(depth, wdepth, cn, buf[0]),
+                            ocl::convertTypeStr(wdepth, depth, cn, buf[1]),
+                            cn, INTER_RESIZE_COEF_BITS));
+            if (k.empty())
+                return false;
+
+            k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst),
+                   (float)inv_fx, (float)inv_fy);
+        }
    }
    else if (interpolation == INTER_NEAREST)
    {
        k.create("resizeNN", ocl::imgproc::resize_oclsrc,
-                 format("-D INTER_NEAREST -D PIXTYPE=%s -D PIXTYPE1=%s -D cn=%d",
+                 format("-D INTER_NEAREST -D T=%s -D T1=%s -D cn=%d",
                        ocl::memopTypeToStr(type), ocl::memopTypeToStr(depth), cn));
+        if (k.empty())
+            return false;
+
+        k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst),
+               (float)inv_fx, (float)inv_fy);
    }
    else if (interpolation == INTER_AREA)
    {
-        int iscale_x = saturate_cast<int>(inv_fx);
-        int iscale_y = saturate_cast<int>(inv_fy);
-        bool is_area_fast = std::abs(inv_fx - iscale_x) < DBL_EPSILON &&
-                        std::abs(inv_fy - iscale_y) < DBL_EPSILON;
        int wdepth = std::max(depth, is_area_fast ? CV_32S : CV_32F);
        int wtype = CV_MAKE_TYPE(wdepth, cn);

        char cvt[2][40];
-        String buildOption = format("-D INTER_AREA -D PIXTYPE=%s -D PIXTYPE1=%s -D WTV=%s -D convertToWTV=%s -D cn=%d",
+        String buildOption = format("-D INTER_AREA -D T=%s -D T1=%s -D WTV=%s -D convertToWTV=%s -D cn=%d",
                                    ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype),
                                    ocl::convertTypeStr(depth, wdepth, cn, cvt[0]), cn);

@ -2103,7 +2179,7 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
        if (is_area_fast)
        {
            int wdepth2 = std::max(CV_32F, depth), wtype2 = CV_MAKE_TYPE(wdepth2, cn);
-            buildOption = buildOption + format(" -D convertToPIXTYPE=%s -D WT2V=%s -D convertToWT2V=%s -D INTER_AREA_FAST"
+            buildOption = buildOption + format(" -D convertToT=%s -D WT2V=%s -D convertToWT2V=%s -D INTER_AREA_FAST"
                                               " -D XSCALE=%d -D YSCALE=%d -D SCALE=%ff",
                                               ocl::convertTypeStr(wdepth2, depth, cn, cvt[0]),
                                               ocl::typeToStr(wtype2), ocl::convertTypeStr(wdepth, wdepth2, cn, cvt[1]),
@ -2126,12 +2202,11 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
        }
        else
        {
-            buildOption = buildOption + format(" -D convertToPIXTYPE=%s", ocl::convertTypeStr(wdepth, depth, cn, cvt[0]));
+            buildOption = buildOption + format(" -D convertToT=%s", ocl::convertTypeStr(wdepth, depth, cn, cvt[0]));
            k.create("resizeAREA", ocl::imgproc::resize_oclsrc, buildOption);
            if (k.empty())
                return false;

-            Size ssize = src.size();
            int xytab_size = (ssize.width + ssize.height) << 1;
            int tabofs_size = dsize.height + dsize.width + 2;

@ -2161,11 +2236,6 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
        return k.run(2, globalsize, NULL, false);
    }

-    if( k.empty() )
-        return false;
-    k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst),
-           (float)inv_fx, (float)inv_fy);
-
    return k.run(2, globalsize, 0, false);
 }

@ -2314,7 +2384,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
    double scale_x = 1./inv_scale_x, scale_y = 1./inv_scale_y;
    int k, sx, sy, dx, dy;

-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR*100 + IPP_VERSION_MINOR >= 701)
+#if IPP_VERSION_X100 >= 701
 #define IPP_RESIZE_EPS    1.e-10

    double ex = fabs((double)dsize.width/src.cols  - inv_scale_x)/inv_scale_x;
@ -3954,25 +4024,25 @@ public:
          *ok = true;
      }

-      virtual void operator() (const Range& range) const
-      {
-          IppiSize srcsize = { src.cols, src.rows };
-          IppiRect srcroi = { 0, 0, src.cols, src.rows };
-          IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start };
-          int cnn = src.channels();
-          if( borderType == BORDER_CONSTANT )
-          {
-              IppiSize setSize = { dst.cols, range.end - range.start };
-              void *dataPointer = dst.data + dst.step[0] * range.start;
-              if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
-              {
-                  *ok = false;
-                  return;
-              }
-          }
-          if( func( src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode ) < 0) ////Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr
-              *ok = false;
-      }
+    virtual void operator() (const Range& range) const
+    {
+        IppiSize srcsize = { src.cols, src.rows };
+        IppiRect srcroi = { 0, 0, src.cols, src.rows };
+        IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start };
+        int cnn = src.channels();
+        if( borderType == BORDER_CONSTANT )
+        {
+            IppiSize setSize = { dst.cols, range.end - range.start };
+            void *dataPointer = dst.data + dst.step[0] * range.start;
+            if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
+            {
+                *ok = false;
+                return;
+            }
+        }
+        if( func( src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode ) < 0) ////Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr
+            *ok = false;
+    }
 private:
    Mat &src;
    Mat &dst;
@ -4297,26 +4367,26 @@ public:
          *ok = true;
      }

-      virtual void operator() (const Range& range) const
-      {
-          IppiSize srcsize = {src.cols, src.rows};
-          IppiRect srcroi = {0, 0, src.cols, src.rows};
-          IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start};
-          int cnn = src.channels();
-
-          if( borderType == BORDER_CONSTANT )
-          {
-              IppiSize setSize = {dst.cols, range.end - range.start};
-              void *dataPointer = dst.data + dst.step[0] * range.start;
-              if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
-              {
-                  *ok = false;
-                  return;
-              }
-          }
-          if( func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode) < 0)
-              *ok = false;
-      }
+    virtual void operator() (const Range& range) const
+    {
+        IppiSize srcsize = {src.cols, src.rows};
+        IppiRect srcroi = {0, 0, src.cols, src.rows};
+        IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start};
+        int cnn = src.channels();
+
+        if( borderType == BORDER_CONSTANT )
+        {
+            IppiSize setSize = {dst.cols, range.end - range.start};
+            void *dataPointer = dst.data + dst.step[0] * range.start;
+            if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
+            {
+                *ok = false;
+                return;
+            }
+        }
+        if( func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode) < 0)
+            *ok = false;
+    }
 private:
    Mat &src;
    Mat &dst;
--- a/modules/imgproc/src/morph.cpp
+++ b/modules/imgproc/src/morph.cpp
@ -1136,80 +1136,128 @@ private:
    Scalar borderValue;
 };

-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+#if IPP_VERSION_X100 >= 801
 static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kernel,
                              const Size& ksize, const Point &anchor, bool rectKernel)
 {
    int type = src.type();
    const Mat* _src = &src;
    Mat temp;
-    if( src.data == dst.data )
+    if (src.data == dst.data)
    {
        src.copyTo(temp);
        _src = &temp;
    }
-    //DEPRECATED. Allocates and initializes morphology state structure for erosion or dilation operation.
-    typedef IppStatus (CV_STDCALL* ippiMorphologyInitAllocFunc)(int, const void*, IppiSize, IppiPoint, IppiMorphState **);
-    typedef IppStatus (CV_STDCALL* ippiMorphologyBorderReplicateFunc)(const void*, int, void *, int,
-                                                                      IppiSize, IppiBorderType, IppiMorphState *);
-    typedef IppStatus (CV_STDCALL* ippiFilterMinMaxGetBufferSizeFunc)(int, IppiSize, int*);
-    typedef IppStatus (CV_STDCALL* ippiFilterMinMaxBorderReplicateFunc)(const void*, int, void*, int,
-                                                                        IppiSize, IppiSize, IppiPoint, void*);
-
-    ippiMorphologyInitAllocFunc initAllocFunc = 0;
-    ippiMorphologyBorderReplicateFunc morphFunc = 0;
-    ippiFilterMinMaxGetBufferSizeFunc getBufSizeFunc = 0;
-    ippiFilterMinMaxBorderReplicateFunc morphRectFunc = 0;
-
-    #define IPP_MORPH_CASE(type, flavor) \
-    case type: \
-        initAllocFunc = (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_##flavor; \
-        morphFunc = op == MORPH_ERODE ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_##flavor : \
-                                        (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_##flavor; \
-        getBufSizeFunc = (ippiFilterMinMaxGetBufferSizeFunc)ippiFilterMinGetBufferSize_##flavor; \
-        morphRectFunc = op == MORPH_ERODE ? (ippiFilterMinMaxBorderReplicateFunc)ippiFilterMinBorderReplicate_##flavor : \
-                                            (ippiFilterMinMaxBorderReplicateFunc)ippiFilterMaxBorderReplicate_##flavor; \
-        break
-
-    switch( type )
-    {
-    IPP_MORPH_CASE(CV_8UC1, 8u_C1R);
-    IPP_MORPH_CASE(CV_8UC3, 8u_C3R);
-    IPP_MORPH_CASE(CV_8UC4, 8u_C4R);
-    IPP_MORPH_CASE(CV_32FC1, 32f_C1R);
-    IPP_MORPH_CASE(CV_32FC3, 32f_C3R);
-    IPP_MORPH_CASE(CV_32FC4, 32f_C4R);
-    default:
-        return false;
-    }
-    #undef IPP_MORPH_CASE

    IppiSize roiSize = {src.cols, src.rows};
    IppiSize kernelSize = {ksize.width, ksize.height};
-    IppiPoint point = {anchor.x, anchor.y};

-    if( !rectKernel && morphFunc && initAllocFunc )
+    if (!rectKernel)
    {
-        IppiMorphState* pState;
-        if( initAllocFunc( roiSize.width, kernel.data, kernelSize, point, &pState ) < 0 )
+#if 1
+        if (((kernel.cols - 1) / 2 != anchor.x) || ((kernel.rows - 1) / 2 != anchor.y))
+            return false;
+        #define IPP_MORPH_CASE(cvtype, flavor, data_type) \
+        case cvtype: \
+            {\
+                int specSize = 0, bufferSize = 0;\
+                if (0 > ippiMorphologyBorderGetSize_##flavor(roiSize.width, kernelSize, &specSize, &bufferSize))\
+                    return false;\
+                IppiMorphState *pSpec = (IppiMorphState*)ippMalloc(specSize);\
+                Ipp8u *pBuffer = (Ipp8u*)ippMalloc(bufferSize);\
+                if (0 > ippiMorphologyBorderInit_##flavor(roiSize.width, kernel.data, kernelSize, pSpec, pBuffer))\
+                {\
+                    ippFree(pBuffer);\
+                    ippFree(pSpec);\
+                    return false;\
+                }\
+                bool ok = false;\
+                if (op == MORPH_ERODE)\
+                    ok = (0 <= ippiErodeBorder_##flavor((Ipp##data_type *)_src->data, (int)_src->step[0], (Ipp##data_type *)dst.data, (int)dst.step[0],\
+                                            roiSize, ippBorderRepl, 0, pSpec, pBuffer));\
+                else\
+                    ok = (0 <= ippiDilateBorder_##flavor((Ipp##data_type *)_src->data, (int)_src->step[0], (Ipp##data_type *)dst.data, (int)dst.step[0],\
+                                            roiSize, ippBorderRepl, 0, pSpec, pBuffer));\
+                ippFree(pBuffer);\
+                ippFree(pSpec);\
+                return ok;\
+            }\
+            break;
+#else
+        IppiPoint point = {anchor.x, anchor.y};
+        // this is case, which can be used with the anchor not in center of the kernel, but
+        // ippiMorphologyBorderGetSize_, ippiErodeBorderReplicate_ and ippiDilateBorderReplicate_ are deprecated.
+        #define IPP_MORPH_CASE(cvtype, flavor, data_type) \
+        case cvtype: \
+            {\
+                int specSize = 0;\
+                int bufferSize = 0;\
+                if (0 > ippiMorphologyGetSize_##flavor( roiSize.width, kernel.data kernelSize, &specSize))\
+                    return false;\
+                bool ok = false;\
+                IppiMorphState* pState = (IppiMorphState*)ippMalloc(specSize);\
+                if (ippiMorphologyInit_##flavor(roiSize.width, kernel.data, kernelSize, point, pState) >= 0)\
+                {\
+                    if (op == MORPH_ERODE)\
+                        ok = ippiErodeBorderReplicate_##flavor((Ipp##data_type *)_src->data, (int)_src->step[0],\
+                            (Ipp##data_type *)dst.data, (int)dst.step[0],\
+                            roiSize, ippBorderRepl, pState ) >= 0;\
+                    else\
+                        ok = ippiDilateBorderReplicate_##flavor((Ipp##data_type *)_src->data, (int)_src->step[0],\
+                            (Ipp##data_type *)dst.data, (int)dst.step[0],\
+                            roiSize, ippBorderRepl, pState ) >= 0;\
+                }\
+                ippFree(pState);\
+                return ok;\
+            }\
+            break;
+#endif
+        switch (type)
+        {
+        IPP_MORPH_CASE(CV_8UC1, 8u_C1R, 8u);
+        IPP_MORPH_CASE(CV_8UC3, 8u_C3R, 8u);
+        IPP_MORPH_CASE(CV_8UC4, 8u_C4R, 8u);
+        IPP_MORPH_CASE(CV_32FC1, 32f_C1R, 32f);
+        IPP_MORPH_CASE(CV_32FC3, 32f_C3R, 32f);
+        IPP_MORPH_CASE(CV_32FC4, 32f_C4R, 32f);
+        default:
            return false;
-        bool is_ok = morphFunc( _src->data, (int)_src->step[0],
-                               dst.data, (int)dst.step[0],
-                               roiSize, ippBorderRepl, pState ) >= 0;
-        ippiMorphologyFree(pState);
-        return is_ok;
+        }
+
+        #undef IPP_MORPH_CASE
    }
-    else if( rectKernel && morphRectFunc && getBufSizeFunc )
+    else
    {
-        int bufSize = 0;
-        if( getBufSizeFunc( src.cols, kernelSize, &bufSize) < 0 )
+        IppiPoint point = {anchor.x, anchor.y};
+
+        #define IPP_MORPH_CASE(cvtype, flavor, data_type) \
+        case cvtype: \
+            {\
+                int bufSize = 0;\
+                if (0 > ippiFilterMinGetBufferSize_##flavor(src.cols, kernelSize, &bufSize))\
+                    return false;\
+                AutoBuffer<uchar> buf(bufSize + 64);\
+                uchar* buffer = alignPtr((uchar*)buf, 32);\
+                if (op == MORPH_ERODE)\
+                    return (0 <= ippiFilterMinBorderReplicate_##flavor((Ipp##data_type *)_src->data, (int)_src->step[0], (Ipp##data_type *)dst.data, (int)dst.step[0], roiSize, kernelSize, point, buffer));\
+                return (0 <= ippiFilterMaxBorderReplicate_##flavor((Ipp##data_type *)_src->data, (int)_src->step[0], (Ipp##data_type *)dst.data, (int)dst.step[0], roiSize, kernelSize, point, buffer));\
+            }\
+            break;
+
+        switch (type)
+        {
+        IPP_MORPH_CASE(CV_8UC1, 8u_C1R, 8u);
+        IPP_MORPH_CASE(CV_8UC3, 8u_C3R, 8u);
+        IPP_MORPH_CASE(CV_8UC4, 8u_C4R, 8u);
+        IPP_MORPH_CASE(CV_32FC1, 32f_C1R, 32f);
+        IPP_MORPH_CASE(CV_32FC3, 32f_C3R, 32f);
+        IPP_MORPH_CASE(CV_32FC4, 32f_C4R, 32f);
+        default:
            return false;
-        AutoBuffer<uchar> buf(bufSize + 64);
-        uchar* buffer = alignPtr((uchar*)buf, 32);
-        return morphRectFunc(_src->data, (int)_src->step[0], dst.data, (int)dst.step[0],
-                             roiSize, kernelSize, point, buffer) >= 0;
+        }
+
+        #undef IPP_MORPH_CASE
    }
-    return false;
 }

 static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
@ -1411,7 +1459,7 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
    Size ksize = kernel.data ? kernel.size() : Size(3,3);
    anchor = normalizeAnchor(anchor, ksize);

-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+#if IPP_VERSION_X100 >= 801
    if( IPPMorphOp(op, _src, _dst, kernel, anchor, iterations, borderType, borderValue) )
        return;
 #endif
--- a/modules/imgproc/src/opencl/laplacian5.cl
+++ b/modules/imgproc/src/opencl/laplacian5.cl
@ -0,0 +1,34 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2014, Itseez, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#define noconvert
+
+__kernel void sumConvert(__global const uchar * src1ptr, int src1_step, int src1_offset,
+                         __global const uchar * src2ptr, int src2_step, int src2_offset,
+                         __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
+                         coeffT scale, coeffT delta)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (y < dst_rows && x < dst_cols)
+    {
+        int src1_index = mad24(y, src1_step, mad24(x, (int)sizeof(srcT), src1_offset));
+        int src2_index = mad24(y, src2_step, mad24(x, (int)sizeof(srcT), src2_offset));
+        int dst_index = mad24(y, dst_step, mad24(x, (int)sizeof(dstT), dst_offset));
+
+        __global const srcT * src1 = (__global const srcT *)(src1ptr + src1_index);
+        __global const srcT * src2 = (__global const srcT *)(src2ptr + src2_index);
+        __global dstT * dst = (__global dstT *)(dstptr + dst_index);
+
+#if wdepth <= 4
+        dst[0] = convertToDT( mad24((WT)(scale), convertToWT(src1[0]) + convertToWT(src2[0]), (WT)(delta)) );
+#else
+        dst[0] = convertToDT( mad((WT)(scale), convertToWT(src1[0]) + convertToWT(src2[0]), (WT)(delta)) );
+#endif
+    }
+}
--- a/modules/imgproc/src/opencl/resize.cl
+++ b/modules/imgproc/src/opencl/resize.cl
@ -43,110 +43,140 @@
 //
 //M*/

-#if defined DOUBLE_SUPPORT
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
+#endif

-#define INTER_RESIZE_COEF_BITS 11
 #define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS)
 #define CAST_BITS (INTER_RESIZE_COEF_BITS << 1)
 #define INC(x,l) min(x+1,l-1)

-
-#define noconvert(x) (x)
+#define noconvert

 #if cn != 3
-#define loadpix(addr)  *(__global const PIXTYPE*)(addr)
-#define storepix(val, addr)  *(__global PIXTYPE*)(addr) = val
-#define PIXSIZE ((int)sizeof(PIXTYPE))
+#define loadpix(addr)  *(__global const T *)(addr)
+#define storepix(val, addr)  *(__global T *)(addr) = val
+#define TSIZE (int)sizeof(T)
 #else
-#define loadpix(addr)  vload3(0, (__global const PIXTYPE1*)(addr))
-#define storepix(val, addr) vstore3(val, 0, (__global PIXTYPE1*)(addr))
-#define PIXSIZE ((int)sizeof(PIXTYPE1)*3)
+#define loadpix(addr)  vload3(0, (__global const T1 *)(addr))
+#define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr))
+#define TSIZE (int)sizeof(T1)*cn
 #endif

-#if defined INTER_LINEAR
+#ifdef INTER_LINEAR_INTEGER

-__kernel void resizeLN(__global const uchar* srcptr, int srcstep, int srcoffset,
-                       int srcrows, int srccols,
-                       __global uchar* dstptr, int dststep, int dstoffset,
-                       int dstrows, int dstcols,
-                       float ifx, float ify)
+__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
+                       __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
+                       __global const uchar * buffer)
 {
    int dx = get_global_id(0);
    int dy = get_global_id(1);

-    float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f);
-    int x = floor(sx), y = floor(sy);
-
-    float u = sx - x, v = sy - y;
+    if (dx < dst_cols && dy < dst_rows)
+    {
+        __global const int * xofs = (__global const int *)(buffer), * yofs = xofs + dst_cols;
+        __global const short * ialpha = (__global const short *)(yofs + dst_rows);
+        __global const short * ibeta = ialpha + ((dst_cols + dy) << 1);
+        ialpha += dx << 1;
+
+        int sx0 = xofs[dx], sy0 = clamp(yofs[dy], 0, src_rows - 1),
+        sy1 = clamp(yofs[dy] + 1, 0, src_rows - 1);
+        short a0 = ialpha[0], a1 = ialpha[1];
+        short b0 = ibeta[0], b1 = ibeta[1];
+
+        int src_index0 = mad24(sy0, src_step, mad24(sx0, TSIZE, src_offset)),
+        src_index1 = mad24(sy1, src_step, mad24(sx0, TSIZE, src_offset));
+        WT data0 = convertToWT(loadpix(srcptr + src_index0));
+        WT data1 = convertToWT(loadpix(srcptr + src_index0 + TSIZE));
+        WT data2 = convertToWT(loadpix(srcptr + src_index1));
+        WT data3 = convertToWT(loadpix(srcptr + src_index1 + TSIZE));
+
+        WT val = ( (((data0 * a0 + data1 * a1) >> 4) * b0) >> 16) +
+                 ( (((data2 * a0 + data3 * a1) >> 4) * b1) >> 16);
+
+        storepix(convertToDT((val + 2) >> 2),
+                 dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
+    }
+}

-    if ( x<0 ) x=0,u=0;
-    if ( x>=srccols ) x=srccols-1,u=0;
-    if ( y<0 ) y=0,v=0;
-    if ( y>=srcrows ) y=srcrows-1,v=0;
+#elif defined INTER_LINEAR

-    int y_ = INC(y,srcrows);
-    int x_ = INC(x,srccols);
+__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
+                       __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
+                       float ifx, float ify)
+{
+    int dx = get_global_id(0);
+    int dy = get_global_id(1);

-#if depth <= 4
+    if (dx < dst_cols && dy < dst_rows)
+    {
+        float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f);
+        int x = floor(sx), y = floor(sy);

-    u = u * INTER_RESIZE_COEF_SCALE;
-    v = v * INTER_RESIZE_COEF_SCALE;
+        float u = sx - x, v = sy - y;

-    int U = rint(u);
-    int V = rint(v);
-    int U1 = rint(INTER_RESIZE_COEF_SCALE - u);
-    int V1 = rint(INTER_RESIZE_COEF_SCALE - v);
+        if ( x<0 ) x=0,u=0;
+        if ( x>=src_cols ) x=src_cols-1,u=0;
+        if ( y<0 ) y=0,v=0;
+        if ( y>=src_rows ) y=src_rows-1,v=0;

-    WORKTYPE data0 = convertToWT(loadpix(srcptr + mad24(y, srcstep, srcoffset + x*PIXSIZE)));
-    WORKTYPE data1 = convertToWT(loadpix(srcptr + mad24(y, srcstep, srcoffset + x_*PIXSIZE)));
-    WORKTYPE data2 = convertToWT(loadpix(srcptr + mad24(y_, srcstep, srcoffset + x*PIXSIZE)));
-    WORKTYPE data3 = convertToWT(loadpix(srcptr + mad24(y_, srcstep, srcoffset + x_*PIXSIZE)));
+        int y_ = INC(y, src_rows);
+        int x_ = INC(x, src_cols);

-    WORKTYPE val = mul24((WORKTYPE)mul24(U1, V1), data0) + mul24((WORKTYPE)mul24(U, V1), data1) +
-               mul24((WORKTYPE)mul24(U1, V), data2) + mul24((WORKTYPE)mul24(U, V), data3);
+#if depth <= 4
+        u = u * INTER_RESIZE_COEF_SCALE;
+        v = v * INTER_RESIZE_COEF_SCALE;

-    PIXTYPE uval = convertToDT((val + (1<<(CAST_BITS-1)))>>CAST_BITS);
+        int U = rint(u);
+        int V = rint(v);
+        int U1 = rint(INTER_RESIZE_COEF_SCALE - u);
+        int V1 = rint(INTER_RESIZE_COEF_SCALE - v);

-#else
-    float u1 = 1.f - u;
-    float v1 = 1.f - v;
-    WORKTYPE data0 = convertToWT(loadpix(srcptr + mad24(y, srcstep, srcoffset + x*PIXSIZE)));
-    WORKTYPE data1 = convertToWT(loadpix(srcptr + mad24(y, srcstep, srcoffset + x_*PIXSIZE)));
-    WORKTYPE data2 = convertToWT(loadpix(srcptr + mad24(y_, srcstep, srcoffset + x*PIXSIZE)));
-    WORKTYPE data3 = convertToWT(loadpix(srcptr + mad24(y_, srcstep, srcoffset + x_*PIXSIZE)));
+        WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))));
+        WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset))));
+        WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset))));
+        WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset))));

-    PIXTYPE uval = u1 * v1 * data0 + u * v1 * data1 + u1 * v *data2 + u * v *data3;
+        WT val = mul24((WT)mul24(U1, V1), data0) + mul24((WT)mul24(U, V1), data1) +
+                   mul24((WT)mul24(U1, V), data2) + mul24((WT)mul24(U, V), data3);

+        T uval = convertToDT((val + (1<<(CAST_BITS-1)))>>CAST_BITS);
+#else
+        float u1 = 1.f - u;
+        float v1 = 1.f - v;
+        WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))));
+        WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset))));
+        WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset))));
+        WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset))));
+
+        T uval = u1 * v1 * data0 + u * v1 * data1 + u1 * v *data2 + u * v *data3;
 #endif
-
-    if(dx < dstcols && dy < dstrows)
-    {
-        storepix(uval, dstptr + mad24(dy, dststep, dstoffset + dx*PIXSIZE));
+        storepix(uval, dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
    }
 }

 #elif defined INTER_NEAREST

-__kernel void resizeNN(__global const uchar* srcptr, int srcstep, int srcoffset,
-                       int srcrows, int srccols,
-                       __global uchar* dstptr, int dststep, int dstoffset,
-                       int dstrows, int dstcols,
+__kernel void resizeNN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
+                       __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
                       float ifx, float ify)
 {
    int dx = get_global_id(0);
    int dy = get_global_id(1);

-    if( dx < dstcols && dy < dstrows )
+    if (dx < dst_cols && dy < dst_rows)
    {
-        float s1 = dx*ifx;
-        float s2 = dy*ify;
-        int sx = min(convert_int_rtz(s1), srccols-1);
-        int sy = min(convert_int_rtz(s2), srcrows-1);
+        float s1 = dx * ifx;
+        float s2 = dy * ify;
+        int sx = min(convert_int_rtz(s1), src_cols - 1);
+        int sy = min(convert_int_rtz(s2), src_rows - 1);

-        storepix(loadpix(srcptr + mad24(sy, srcstep, srcoffset + sx*PIXSIZE)),
-                 dstptr + mad24(dy, dststep, dstoffset + dx*PIXSIZE));
+        storepix(loadpix(srcptr + mad24(sy, src_step, mad24(sx, TSIZE, src_offset))),
+                 dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
    }
 }

@ -179,10 +209,10 @@ __kernel void resizeAREA_FAST(__global const uchar * src, int src_step, int src_
            int src_index = mad24(symap_tab[y + sy], src_step, src_offset);
            #pragma unroll
            for (int x = 0; x < XSCALE; ++x)
-                sum += convertToWTV(loadpix(src + src_index + sxmap_tab[sx + x]*PIXSIZE));
+                sum += convertToWTV(loadpix(src + mad24(sxmap_tab[sx + x], TSIZE, src_index)));
        }

-        storepix(convertToPIXTYPE(convertToWT2V(sum) * (WT2V)(SCALE)), dst + dst_index + dx*PIXSIZE);
+        storepix(convertToT(convertToWT2V(sum) * (WT2V)(SCALE)), dst + mad24(dx, TSIZE, dst_index));
    }
 }

@ -224,12 +254,12 @@ __kernel void resizeAREA(__global const uchar * src, int src_step, int src_offse
            for (int sx = sx0, xk = xk0; sx <= sx1; ++sx, ++xk)
            {
                WTV alpha = (WTV)(xalpha_tab[xk]);
-                buf += convertToWTV(loadpix(src + src_index + sx*PIXSIZE)) * alpha;
+                buf += convertToWTV(loadpix(src + mad24(sx, TSIZE, src_index))) * alpha;
            }
            sum += buf * beta;
        }

-        storepix(convertToPIXTYPE(sum), dst + dst_index + dx*PIXSIZE);
+        storepix(convertToT(sum), dst + mad24(dx, TSIZE, dst_index));
    }
 }

--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
@ -1109,20 +1109,27 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
        return;
 #endif

-#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7)
+#if IPP_VERSION_X100 >= 801
    if( type == CV_32FC1 && sigma1 == sigma2 && ksize.width == ksize.height && sigma1 != 0.0 )
    {
        Mat src = _src.getMat(), dst = _dst.getMat();
        IppiSize roi = { src.cols, src.rows };
-        int bufSize = 0;
-        ippiFilterGaussGetBufferSize_32f_C1R(roi, ksize.width, &bufSize);
-        AutoBuffer<uchar> buf(bufSize+128);
-        if( ippiFilterGaussBorder_32f_C1R((const Ipp32f *)src.data, (int)src.step,
-                                          (Ipp32f *)dst.data, (int)dst.step,
-                                          roi, ksize.width, (Ipp32f)sigma1,
-                                          (IppiBorderType)borderType, 0.0,
-                                          alignPtr(&buf[0],32)) >= 0 )
-            return;
+        int specSize = 0, bufferSize = 0;
+        if (0 <=  ippiFilterGaussianGetBufferSize(roi, (Ipp32u)ksize.width, ipp32f, 1, &specSize, &bufferSize))
+        {
+            IppFilterGaussianSpec *pSpec = (IppFilterGaussianSpec*)ippMalloc(specSize);
+            Ipp8u *pBuffer = (Ipp8u*)ippMalloc(bufferSize);
+            if (0 <= ippiFilterGaussianInit(roi, (Ipp32u)ksize.width, (Ipp32f)sigma1, (IppiBorderType)borderType, ipp32f, 1, pSpec, pBuffer))
+            {
+                IppStatus sts = ippiFilterGaussianBorder_32f_C1R( (const Ipp32f *)src.data, (int)src.step,
+                                                                     (Ipp32f *)dst.data, (int)dst.step,
+                                                                     roi,  0.0, pSpec, pBuffer);
+                ippFree(pBuffer);
+                ippFree(pSpec);
+                if (0 <= sts)
+                    return;
+            }
+        }
    }
 #endif

@ -2180,11 +2187,19 @@ public:
          IppiSize kernel = {d, d};
          IppiSize roi={dst.cols, range.end - range.start};
          int bufsize=0;
-          ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize);
+          if (0 > ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize))
+          {
+              *ok = false;
+              return;
+          }
          AutoBuffer<uchar> buf(bufsize);
          IppiFilterBilateralSpec *pSpec = (IppiFilterBilateralSpec *)alignPtr(&buf[0], 32);
-          ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, (Ipp32f)sigma_color, (Ipp32f)sigma_space, 1, pSpec );
-          if( ippiFilterBilateral_8u_C1R( src.ptr<uchar>(range.start) + radius * ((int)src.step[0] + 1), (int)src.step[0], dst.ptr<uchar>(range.start), (int)dst.step[0], roi, kernel, pSpec ) < 0)
+          if (0 > ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, (Ipp32f)sigma_color, (Ipp32f)sigma_space, 1, pSpec ))
+          {
+              *ok = false;
+              return;
+          }
+          if (0 > ippiFilterBilateral_8u_C1R( src.ptr<uchar>(range.start) + radius * ((int)src.step[0] + 1), (int)src.step[0], dst.ptr<uchar>(range.start), (int)dst.step[0], roi, kernel, pSpec ))
              *ok = false;
      }
 private:
--- a/modules/imgproc/src/sumpixels.cpp
+++ b/modules/imgproc/src/sumpixels.cpp
@ -365,30 +365,32 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
    if( ( depth == CV_8U ) && ( sdepth == CV_32F || sdepth == CV_32S ) && ( !_tilted.needed() ) && ( !_sqsum.needed() || sqdepth == CV_64F ) && ( cn == 1 ) )
    {
+        IppStatus status = ippStsErr;
        IppiSize srcRoiSize = ippiSize( src.cols, src.rows );
        if( sdepth == CV_32F )
        {
            if( _sqsum.needed() )
            {
-                ippiSqrIntegral_8u32f64f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32f*)sum.data, (int)sum.step, (Ipp64f*)sqsum.data, (int)sqsum.step, srcRoiSize, 0, 0 );
+                status = ippiSqrIntegral_8u32f64f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32f*)sum.data, (int)sum.step, (Ipp64f*)sqsum.data, (int)sqsum.step, srcRoiSize, 0, 0 );
            }
            else
            {
-                ippiIntegral_8u32f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32f*)sum.data, (int)sum.step, srcRoiSize, 0 );
+                status = ippiIntegral_8u32f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32f*)sum.data, (int)sum.step, srcRoiSize, 0 );
            }
        }
        else if( sdepth == CV_32S )
        {
            if( _sqsum.needed() )
            {
-                ippiSqrIntegral_8u32s64f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32s*)sum.data, (int)sum.step, (Ipp64f*)sqsum.data, (int)sqsum.step, srcRoiSize, 0, 0 );
+                status = ippiSqrIntegral_8u32s64f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32s*)sum.data, (int)sum.step, (Ipp64f*)sqsum.data, (int)sqsum.step, srcRoiSize, 0, 0 );
            }
            else
            {
-                ippiIntegral_8u32s_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32s*)sum.data, (int)sum.step, srcRoiSize, 0 );
+                status = ippiIntegral_8u32s_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32s*)sum.data, (int)sum.step, srcRoiSize, 0 );
            }
        }
-        return;
+        if (0 <= status)
+            return;
    }
 #endif

--- a/modules/imgproc/test/ocl/test_filters.cpp
+++ b/modules/imgproc/test/ocl/test_filters.cpp
@ -316,7 +316,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine(

 OCL_INSTANTIATE_TEST_CASE_P(Filter, LaplacianTest, Combine(
                            FILTER_TYPES,
-                            Values(1, 3), // kernel size
+                            Values(1, 3, 5), // kernel size
                            Values(Size(0, 0)), // not used
                            FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
                            Values(1.0, 0.2, 3.0), // kernel scale
--- a/modules/imgproc/test/ocl/test_histogram.cpp
+++ b/modules/imgproc/test/ocl/test_histogram.cpp
@ -13,6 +13,7 @@
 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2014, Itseez, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
@ -144,6 +145,37 @@ PARAM_TEST_CASE(CalcBackProject, MatDepth, int, bool)

        scale = randomDouble(0.1, 1);
    }
+
+    virtual void test_by_pict()
+    {
+        Mat frame1 = readImage("optflow/RubberWhale1.png", IMREAD_GRAYSCALE);
+
+        UMat usrc;
+        frame1.copyTo(usrc);
+        int histSize = randomInt(3, 29);
+        float hue_range[] = { 0, 180 };
+        const float* ranges1 = { hue_range };
+        Mat hist1;
+
+        //compute histogram
+        calcHist(&frame1, 1, 0, Mat(), hist1, 1, &histSize, &ranges1, true, false);
+        normalize(hist1, hist1, 0, 255, NORM_MINMAX, -1, Mat());
+
+        Mat dst1;
+        UMat udst1, src, uhist1;
+        hist1.copyTo(uhist1);
+        std::vector<UMat> uims;
+        uims.push_back(usrc);
+        std::vector<float> urngs;
+        urngs.push_back(0);
+        urngs.push_back(180);
+        std::vector<int> chs;
+        chs.push_back(0);
+
+        OCL_OFF(calcBackProject(&frame1, 1, 0, hist1, dst1, &ranges1, 1, true));
+        OCL_ON(calcBackProject(uims, chs, uhist1, udst1, urngs, 1.0));
+        EXPECT_MAT_NEAR(dst1, udst1, 0.0);
+    }
 };

 //////////////////////////////// CalcBackProject //////////////////////////////////////////////
@ -157,7 +189,14 @@ OCL_TEST_P(CalcBackProject, Mat)
        OCL_OFF(cv::calcBackProject(images_roi, channels, hist_roi, dst_roi, ranges, scale));
        OCL_ON(cv::calcBackProject(uimages_roi, channels, uhist_roi, udst_roi, ranges, scale));

-        OCL_EXPECT_MATS_NEAR(dst, 0.0);
+        Size dstSize = dst_roi.size();
+        int nDiffs = (int)(0.03f*dstSize.height*dstSize.width);
+
+        //check if the dst mats are the same except 3% difference
+        EXPECT_MAT_N_DIFF(dst_roi, udst_roi, nDiffs);
+
+        //check in addition on given image
+        test_by_pict();
    }
 }

--- a/modules/imgproc/test/ocl/test_warp.cpp
+++ b/modules/imgproc/test/ocl/test_warp.cpp
@ -210,12 +210,15 @@ OCL_TEST_P(Resize, Mat)
 {
    for (int j = 0; j < test_loop_times; j++)
    {
+        int depth = CV_MAT_DEPTH(type);
+        double eps = depth <= CV_32S ? 1 : 1e-2;
+
        random_roi();

        OCL_OFF(cv::resize(src_roi, dst_roi, Size(), fx, fy, interpolation));
        OCL_ON(cv::resize(usrc_roi, udst_roi, Size(), fx, fy, interpolation));

-        Near(1.0);
+        Near(eps);
    }
 }

@ -328,8 +331,8 @@ OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpPerspective, Combine(

 OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, Resize, Combine(
                            Values(CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, CV_32FC4),
-                            Values(0.5, 1.5, 2.0),
-                            Values(0.5, 1.5, 2.0),
+                            Values(0.5, 1.5, 2.0, 0.2),
+                            Values(0.5, 1.5, 2.0, 0.2),
                            Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR),
                            Bool()));

--- a/modules/imgproc/test/test_bilateral_filter.cpp
+++ b/modules/imgproc/test/test_bilateral_filter.cpp
@ -264,7 +264,7 @@ namespace cvtest
            reference_dst.convertTo(reference_dst, type);
        }

-        double e = norm(reference_dst, _parallel_dst);
+        double e = cvtest::norm(reference_dst, _parallel_dst, NORM_L2);
        if (e > eps)
        {
            ts->printf(cvtest::TS::CONSOLE, "actual error: %g, expected: %g", e, eps);
--- a/modules/imgproc/test/test_connectedcomponents.cpp
+++ b/modules/imgproc/test/test_connectedcomponents.cpp
@ -91,12 +91,12 @@ void CV_ConnectedComponentsTest::run( int /* start_from */)
        exp = labelImage;
    }

-    if (0 != norm(labelImage > 0, exp > 0, NORM_INF))
+    if (0 != cvtest::norm(labelImage > 0, exp > 0, NORM_INF))
    {
        ts->set_failed_test_info( cvtest::TS::FAIL_MISMATCH );
        return;
    }
-    if (nLabels != norm(labelImage, NORM_INF)+1)
+    if (nLabels != cvtest::norm(labelImage, NORM_INF)+1)
    {
        ts->set_failed_test_info( cvtest::TS::FAIL_MISMATCH );
        return;
--- a/modules/imgproc/test/test_convhull.cpp
+++ b/modules/imgproc/test/test_convhull.cpp
@ -566,6 +566,8 @@ int CV_ConvHullTest::validate_test_results( int test_case_idx )
    hull = cvCreateMat( 1, hull_count, CV_32FC2 );
    mask = cvCreateMat( 1, hull_count, CV_8UC1 );
    cvZero( mask );
+    Mat _mask = cvarrToMat(mask);
+
    h = (CvPoint2D32f*)(hull->data.ptr);

    // extract convex hull points
@ -643,7 +645,7 @@ int CV_ConvHullTest::validate_test_results( int test_case_idx )
            mask->data.ptr[idx] = (uchar)1;
    }

-    if( cvNorm( mask, 0, CV_L1 ) != hull_count )
+    if( cvtest::norm( _mask, Mat::zeros(_mask.dims, _mask.size, _mask.type()), NORM_L1 ) != hull_count )
    {
        ts->printf( cvtest::TS::LOG, "Not every convex hull vertex coincides with some input point\n" );
        code = cvtest::TS::FAIL_BAD_ACCURACY;
--- a/modules/imgproc/test/test_houghLines.cpp
+++ b/modules/imgproc/test/test_houghLines.cpp
@ -137,7 +137,7 @@ void CV_HoughLinesTest::run_test(int type)
    if( exp_lines.size != lines.size )
        transpose(lines, lines);

-    if ( exp_lines.size != lines.size || norm(exp_lines, lines, NORM_INF) > 1e-4 )
+    if ( exp_lines.size != lines.size || cvtest::norm(exp_lines, lines, NORM_INF) > 1e-4 )
    {
        ts->set_failed_test_info(cvtest::TS::FAIL_MISMATCH);
        return;
--- a/modules/imgproc/test/test_imgwarp.cpp
+++ b/modules/imgproc/test/test_imgwarp.cpp
@ -1530,7 +1530,7 @@ TEST(Imgproc_resize_area, regression)
            }
    }

-    ASSERT_EQ(norm(one_channel_diff, cv::NORM_INF), 0);
+    ASSERT_EQ(cvtest::norm(one_channel_diff, cv::NORM_INF), 0);
 }


--- a/modules/imgproc/test/test_imgwarp_strict.cpp
+++ b/modules/imgproc/test/test_imgwarp_strict.cpp
@ -254,7 +254,7 @@ void CV_ImageWarpBaseTest::validate_results() const
 //                fabs(rD[dx] - D[dx]) < 250.0f &&
                rD[dx] <= 255.0f && D[dx] <= 255.0f && rD[dx] >= 0.0f && D[dx] >= 0.0f)
            {
-                PRINT_TO_LOG("\nNorm of the difference: %lf\n", norm(reference_dst, _dst, NORM_INF));
+                PRINT_TO_LOG("\nNorm of the difference: %lf\n", cvtest::norm(reference_dst, _dst, NORM_INF));
                PRINT_TO_LOG("Error in (dx, dy): (%d, %d)\n", dx / cn + 1, dy + 1);
                PRINT_TO_LOG("Tuple (rD, D): (%f, %f)\n", rD[dx], D[dx]);
                PRINT_TO_LOG("Dsize: (%d, %d)\n", dsize.width / cn, dsize.height);
--- a/modules/java/generator/rst_parser.py
+++ b/modules/java/generator/rst_parser.py
@ -1,5 +1,6 @@
 #!/usr/bin/env python

+from __future__ import print_function
 import os, sys, re, string, fnmatch
 allmodules = ["core", "flann", "imgproc", "ml", "highgui", "video", "features2d", "calib3d", "objdetect", "legacy", "contrib", "cuda", "androidcamera", "java", "python", "stitching", "ts", "photo", "nonfree", "videostab", "softcascade", "superres"]
 verbose = False
@ -141,10 +142,10 @@ class RstParser(object):
    def parse_section_safe(self, module_name, section_name, file_name, lineno, lines):
        try:
            self.parse_section(module_name, section_name, file_name, lineno, lines)
-        except AssertionError, args:
+        except AssertionError as args:
            if show_errors:
-                print >> sys.stderr, "RST parser error E%03d: assertion in \"%s\" at %s:%s" % (ERROR_001_SECTIONFAILURE, section_name, file_name, lineno)
-                print >> sys.stderr, "    Details: %s" % args
+                print("RST parser error E%03d: assertion in \"%s\" at %s:%s" % (ERROR_001_SECTIONFAILURE, section_name, file_name, lineno), file=sys.stderr)
+                print("    Details: %s" % args, file=sys.stderr)

    def parse_section(self, module_name, section_name, file_name, lineno, lines):
        self.sections_total += 1
@ -152,7 +153,7 @@ class RstParser(object):
        #if section_name.find(" ") >= 0 and section_name.find("::operator") < 0:
        if (section_name.find(" ") >= 0 and not bool(re.match(r"(\w+::)*operator\s*(\w+|>>|<<|\(\)|->|\+\+|--|=|==|\+=|-=)", section_name)) ) or section_name.endswith(":"):
            if show_errors:
-                print >> sys.stderr, "RST parser warning W%03d:  SKIPPED: \"%s\" File: %s:%s" % (WARNING_002_HDRWHITESPACE, section_name, file_name, lineno)
+                print("RST parser warning W%03d:  SKIPPED: \"%s\" File: %s:%s" % (WARNING_002_HDRWHITESPACE, section_name, file_name, lineno), file=sys.stderr)
            self.sections_skipped += 1
            return

@ -311,7 +312,7 @@ class RstParser(object):

        if fdecl.balance != 0:
            if show_critical_errors:
-                print >> sys.stderr, "RST parser error E%03d: invalid parentheses balance in \"%s\" at %s:%s" % (ERROR_003_PARENTHESES, section_name, file_name, lineno)
+                print("RST parser error E%03d: invalid parentheses balance in \"%s\" at %s:%s" % (ERROR_003_PARENTHESES, section_name, file_name, lineno), file=sys.stderr)
            return

        # save last parameter if needed
@ -328,7 +329,7 @@ class RstParser(object):
        elif func:
            if func["name"] in known_text_sections_names:
                if show_errors:
-                    print >> sys.stderr, "RST parser warning W%03d:  SKIPPED: \"%s\" File: %s:%s" % (WARNING_002_HDRWHITESPACE, section_name, file_name, lineno)
+                    print("RST parser warning W%03d:  SKIPPED: \"%s\" File: %s:%s" % (WARNING_002_HDRWHITESPACE, section_name, file_name, lineno), file=sys.stderr)
                self.sections_skipped += 1
            elif show_errors:
                self.print_info(func, True, sys.stderr)
@ -351,7 +352,7 @@ class RstParser(object):
            if l.find("\t") >= 0:
                whitespace_warnings += 1
                if whitespace_warnings <= max_whitespace_warnings and show_warnings:
-                    print >> sys.stderr, "RST parser warning W%03d: tab symbol instead of space is used at %s:%s" % (WARNING_004_TABS, doc, lineno)
+                    print("RST parser warning W%03d: tab symbol instead of space is used at %s:%s" % (WARNING_004_TABS, doc, lineno), file=sys.stderr)
                l = l.replace("\t", "    ")

            # handle first line
@ -388,8 +389,8 @@ class RstParser(object):

    def add_new_fdecl(self, func, decl):
        if decl.fdecl.endswith(";"):
-            print >> sys.stderr, "RST parser error E%03d: unexpected semicolon at the end of declaration in \"%s\" at %s:%s" \
-                        % (ERROR_011_EOLEXPECTED, func["name"], func["file"], func["line"])
+            print("RST parser error E%03d: unexpected semicolon at the end of declaration in \"%s\" at %s:%s" \
+                        % (ERROR_011_EOLEXPECTED, func["name"], func["file"], func["line"]), file=sys.stderr)
        decls =  func.get("decls", [])
        if (decl.lang == "C++" or decl.lang == "C"):
            rst_decl = self.cpp_parser.parse_func_decl_no_wrap(decl.fdecl)
@ -405,37 +406,37 @@ class RstParser(object):
            if show_errors:
                #check black_list
                if decl.name not in params_blacklist.get(func["name"], []):
-                    print >> sys.stderr, "RST parser error E%03d: redefinition of parameter \"%s\" in \"%s\" at %s:%s" \
-                        % (ERROR_005_REDEFENITIONPARAM, decl.name, func["name"], func["file"], func["line"])
+                    print("RST parser error E%03d: redefinition of parameter \"%s\" in \"%s\" at %s:%s" \
+                        % (ERROR_005_REDEFENITIONPARAM, decl.name, func["name"], func["file"], func["line"]), file=sys.stderr)
        else:
            params[decl.name] = decl.comment
            func["params"] = params

    def print_info(self, func, skipped=False, out = sys.stdout):
-        print >> out
+        print(file=out)
        if skipped:
-            print >> out, "SKIPPED DEFINITION:"
-        print >> out, "name:      %s" % (func.get("name","~empty~"))
-        print >> out, "file:      %s:%s" % (func.get("file","~empty~"), func.get("line","~empty~"))
-        print >> out, "is class:  %s" % func.get("isclass", False)
-        print >> out, "is struct: %s" % func.get("isstruct", False)
-        print >> out, "module:    %s" % func.get("module","~unknown~")
-        print >> out, "namespace: %s" % func.get("namespace", "~empty~")
-        print >> out, "class:     %s" % (func.get("class","~empty~"))
-        print >> out, "method:    %s" % (func.get("method","~empty~"))
-        print >> out, "brief:     %s" % (func.get("brief","~empty~"))
+            print("SKIPPED DEFINITION:", file=out)
+        print("name:      %s" % (func.get("name","~empty~")), file=out)
+        print("file:      %s:%s" % (func.get("file","~empty~"), func.get("line","~empty~")), file=out)
+        print("is class:  %s" % func.get("isclass", False), file=out)
+        print("is struct: %s" % func.get("isstruct", False), file=out)
+        print("module:    %s" % func.get("module","~unknown~"), file=out)
+        print("namespace: %s" % func.get("namespace", "~empty~"), file=out)
+        print("class:     %s" % (func.get("class","~empty~")), file=out)
+        print("method:    %s" % (func.get("method","~empty~")), file=out)
+        print("brief:     %s" % (func.get("brief","~empty~")), file=out)
        if "decls" in func:
-            print >> out, "declarations:"
+            print("declarations:", file=out)
            for d in func["decls"]:
-                print >> out, "     %7s: %s" % (d[0], re.sub(r"[ ]+", " ", d[1]))
+                print("     %7s: %s" % (d[0], re.sub(r"[ ]+", " ", d[1])), file=out)
        if "seealso" in func:
-            print >> out, "seealso:  ", func["seealso"]
+            print("seealso:  ", func["seealso"], file=out)
        if "params" in func:
-            print >> out, "parameters:"
+            print("parameters:", file=out)
            for name, comment in func["params"].items():
-                print >> out, "%23s:   %s" % (name, comment)
-        print >> out, "long:      %s" % (func.get("long","~empty~"))
-        print >> out
+                print("%23s:   %s" % (name, comment), file=out)
+        print("long:      %s" % (func.get("long","~empty~")), file=out)
+        print(file=out)

    def validate(self, func):
        if func.get("decls", None) is None:
@ -443,13 +444,13 @@ class RstParser(object):
                return False
        if func["name"] in self.definitions:
            if show_errors:
-                print >> sys.stderr, "RST parser error E%03d: \"%s\" from: %s:%s is already documented at %s:%s" \
-                    % (ERROR_006_REDEFENITIONFUNC, func["name"], func["file"], func["line"], self.definitions[func["name"]]["file"], self.definitions[func["name"]]["line"])
+                print("RST parser error E%03d: \"%s\" from: %s:%s is already documented at %s:%s" \
+                    % (ERROR_006_REDEFENITIONFUNC, func["name"], func["file"], func["line"], self.definitions[func["name"]]["file"], self.definitions[func["name"]]["line"]), file=sys.stderr)
            return False
        return self.validateParams(func)

    def validateParams(self, func):
-        documentedParams = func.get("params", {}).keys()
+        documentedParams = list(func.get("params", {}).keys())
        params = []

        for decl in func.get("decls", []):
@ -464,13 +465,13 @@ class RstParser(object):
        # 1. all params are documented
        for p in params:
            if p not in documentedParams and show_warnings:
-                print >> sys.stderr, "RST parser warning W%03d: parameter \"%s\" of \"%s\" is undocumented. %s:%s" % (WARNING_007_UNDOCUMENTEDPARAM, p, func["name"], func["file"], func["line"])
+                print("RST parser warning W%03d: parameter \"%s\" of \"%s\" is undocumented. %s:%s" % (WARNING_007_UNDOCUMENTEDPARAM, p, func["name"], func["file"], func["line"]), file=sys.stderr)

        # 2. only real params are documented
        for p in documentedParams:
            if p not in params and show_warnings:
                if p not in params_blacklist.get(func["name"], []):
-                    print >> sys.stderr, "RST parser warning W%03d: unexisting parameter \"%s\" of \"%s\" is documented at %s:%s" % (WARNING_008_MISSINGPARAM, p, func["name"], func["file"], func["line"])
+                    print("RST parser warning W%03d: unexisting parameter \"%s\" of \"%s\" is documented at %s:%s" % (WARNING_008_MISSINGPARAM, p, func["name"], func["file"], func["line"]), file=sys.stderr)
        return True

    def normalize(self, func):
@ -541,7 +542,7 @@ class RstParser(object):
                func["name"] = fname[4:]
                func["method"] = fname[4:]
            elif show_warnings:
-                print >> sys.stderr, "RST parser warning W%03d:  \"%s\" - section name is \"%s\" instead of \"%s\" at %s:%s" % (WARNING_009_HDRMISMATCH, fname, func["name"], fname[6:], func["file"], func["line"])
+                print("RST parser warning W%03d:  \"%s\" - section name is \"%s\" instead of \"%s\" at %s:%s" % (WARNING_009_HDRMISMATCH, fname, func["name"], fname[6:], func["file"], func["line"]), file=sys.stderr)
                #self.print_info(func)

    def normalizeText(self, s):
@ -632,11 +633,11 @@ class RstParser(object):
        return s

    def printSummary(self):
-        print "RST Parser Summary:"
-        print "  Total sections:   %s" % self.sections_total
-        print "  Skipped sections: %s" % self.sections_skipped
-        print "  Parsed  sections: %s" % self.sections_parsed
-        print "  Invalid sections: %s" % (self.sections_total - self.sections_parsed - self.sections_skipped)
+        print("RST Parser Summary:")
+        print("  Total sections:   %s" % self.sections_total)
+        print("  Skipped sections: %s" % self.sections_skipped)
+        print("  Parsed  sections: %s" % self.sections_parsed)
+        print("  Invalid sections: %s" % (self.sections_total - self.sections_parsed - self.sections_skipped))

        # statistic by language
        stat = {}
@ -651,12 +652,12 @@ class RstParser(object):
                for decl in d.get("decls", []):
                    stat[decl[0]] = stat.get(decl[0], 0) + 1

-        print
-        print "  classes documented:           %s" % classes
-        print "  structs documented:           %s" % structs
+        print()
+        print("  classes documented:           %s" % classes)
+        print("  structs documented:           %s" % structs)
        for lang in sorted(stat.items()):
-            print "  %7s functions documented: %s" % lang
-        print
+            print("  %7s functions documented: %s" % lang)
+        print()

 def mathReplace2(match):
    m = mathReplace(match)
@ -743,7 +744,7 @@ def mathReplace(match):

 if __name__ == "__main__":
    if len(sys.argv) < 2:
-        print "Usage:\n", os.path.basename(sys.argv[0]), " <module path>"
+        print("Usage:\n", os.path.basename(sys.argv[0]), " <module path>")
        exit(0)

    if len(sys.argv) >= 3:
@ -759,7 +760,7 @@ if __name__ == "__main__":
    module = sys.argv[1]

    if module != "all" and not os.path.isdir(os.path.join(rst_parser_dir, "../../" + module)):
-        print "RST parser error E%03d: module \"%s\" could not be found." % (ERROR_010_NOMODULE, module)
+        print("RST parser error E%03d: module \"%s\" could not be found." % (ERROR_010_NOMODULE, module))
        exit(1)

    parser = RstParser(hdr_parser.CppHeaderParser())
--- a/Show More
+++ b/Show More