Merge branch 'master' of https://github.com/Itseez/opencv into gdal_lan

Conflicts: modules/imgcodecs/include/opencv2/imgcodecs.hpp
10 years ago · a242556b1d
parent decdae515b 6e2a68fdf3
commit a242556b1d
260 changed files with 10174 additions and 5036 deletions
--- a/3rdparty/ippicv/downloader.cmake
+++ b/3rdparty/ippicv/downloader.cmake
@ -6,6 +6,8 @@
 #

 function(_icv_downloader)
+  # Commit SHA in the opencv_3rdparty repo
+  set(IPPICV_BINARIES_COMMIT "3d41df448b589aa076d9d27ace344d3ef709e4b9")
  # Define actual ICV versions
  if(APPLE)
    set(OPENCV_ICV_PACKAGE_NAME "ippicv_macosx_20141027.tgz")
@ -62,7 +64,7 @@ function(_icv_downloader)
      if(DEFINED ENV{OPENCV_ICV_URL})
        set(OPENCV_ICV_URL $ENV{OPENCV_ICV_URL})
      else()
-        set(OPENCV_ICV_URL "http://sourceforge.net/projects/opencvlibrary/files/3rdparty/ippicv")
+        set(OPENCV_ICV_URL "https://raw.githubusercontent.com/Itseez/opencv_3rdparty/${IPPICV_BINARIES_COMMIT}/ippicv")
      endif()
    endif()

--- a/3rdparty/libtiff/CMakeLists.txt
+++ b/3rdparty/libtiff/CMakeLists.txt
@ -93,6 +93,7 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4018 /wd4100 /wd4127 /wd4311 /wd4701 /wd
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4244) # vs2008
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4267 /wd4305 /wd4306) # vs2008 Win64
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4703) # vs2012
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4456 /wd4457 /wd4312) # vs2015

 ocv_warnings_disable(CMAKE_C_FLAGS /wd4267 /wd4244 /wd4018)

--- a/3rdparty/openexr/CMakeLists.txt
+++ b/3rdparty/openexr/CMakeLists.txt
@ -44,6 +44,7 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4018 /wd4099 /wd4100 /wd4101 /wd4127 /wd
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4334) # vs2005 Win64
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4244) # vs2008
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4267) # vs2008 Win64
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4456) # vs2015

 if(UNIX AND (CMAKE_COMPILER_IS_GNUCXX OR CV_ICC))
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
--- a/3rdparty/tbb/CMakeLists.txt
+++ b/3rdparty/tbb/CMakeLists.txt
@ -212,7 +212,13 @@ else()
 endif()

 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
-string(REPLACE "-Werror=non-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+
+# filter out flags that are not handled well by the TBB code
+foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
+  string(REPLACE "-Werror=non-virtual-dtor" "" ${var} "${${var}}")
+  string(REPLACE "-fvisibility=hidden" "" ${var} "${${var}}")
+  string(REPLACE "-fvisibility-inlines-hidden" "" ${var} "${${var}}")
+endforeach()

 if (WIN32)
  set(tbb_debug_postfix "_debug") # to fit pragmas in _windef.h inside TBB
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -85,6 +85,8 @@ if(DEFINED CMAKE_BUILD_TYPE)
  set_property( CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS ${CMAKE_CONFIGURATION_TYPES} )
 endif()

+enable_testing()
+
 project(OpenCV CXX C)

 if(MSVC)
@ -188,7 +190,7 @@ OCV_OPTION(WITH_QUICKTIME      "Use QuickTime for Video I/O insted of QTKit" OFF
 OCV_OPTION(WITH_TBB            "Include Intel TBB support"                   OFF  IF (NOT IOS AND NOT WINRT) )
 OCV_OPTION(WITH_OPENMP         "Include OpenMP support"                      OFF)
 OCV_OPTION(WITH_CSTRIPES       "Include C= support"                          OFF  IF (WIN32 AND NOT WINRT)  )
-OCV_OPTION(WITH_PTHREADS_PF    "Use pthreads-based parallel_for"             ON   IF (NOT WIN32) )
+OCV_OPTION(WITH_PTHREADS_PF    "Use pthreads-based parallel_for"             ON   IF (NOT WIN32 OR MINGW) )
 OCV_OPTION(WITH_TIFF           "Include TIFF support"                        ON   IF (NOT IOS) )
 OCV_OPTION(WITH_UNICAP         "Include Unicap support (GPL)"                OFF  IF (UNIX AND NOT APPLE AND NOT ANDROID) )
 OCV_OPTION(WITH_V4L            "Include Video 4 Linux support"               ON   IF (UNIX AND NOT ANDROID) )
@ -205,7 +207,8 @@ OCV_OPTION(WITH_OPENCLAMDBLAS  "Include AMD OpenCL BLAS library support"     ON
 OCV_OPTION(WITH_DIRECTX        "Include DirectX support"                     ON   IF (WIN32 AND NOT WINRT) )
 OCV_OPTION(WITH_INTELPERC      "Include Intel Perceptual Computing support"  OFF  IF (WIN32 AND NOT WINRT) )
 OCV_OPTION(WITH_IPP_A          "Include Intel IPP_A support"                 OFF  IF (MSVC OR X86 OR X86_64) )
-OCV_OPTION(WITH_VAAPI          "Include VA-API support"                      OFF  IF (UNIX AND NOT ANDROID) )
+OCV_OPTION(WITH_VA             "Include VA support"                          OFF  IF (UNIX AND NOT ANDROID) )
+OCV_OPTION(WITH_VA_INTEL       "Include Intel VA-API/OpenCL support"         OFF  IF (UNIX AND NOT ANDROID) )
 OCV_OPTION(WITH_GDAL           "Include GDAL Support"                        OFF  IF (NOT ANDROID AND NOT IOS AND NOT WINRT) )
 OCV_OPTION(WITH_GPHOTO2        "Include gPhoto2 library support"             ON   IF (UNIX AND NOT ANDROID) )

@ -1066,9 +1069,13 @@ if(DEFINED WITH_IPP_A)
 status("    Use IPP Async:"  HAVE_IPP_A       THEN "YES" ELSE NO)
 endif(DEFINED WITH_IPP_A)

-if(DEFINED WITH_VAAPI)
-status("    Use Intel VA-API:"  HAVE_VAAPI       THEN "YES (MSDK: ${VAAPI_MSDK_ROOT}  OpenCL: ${VAAPI_IOCL_ROOT})" ELSE NO)
-endif(DEFINED WITH_VAAPI)
+if(DEFINED WITH_VA)
+status("    Use VA:"            HAVE_VA          THEN "YES" ELSE NO)
+endif(DEFINED WITH_VA)
+
+if(DEFINED WITH_VA_INTEL)
+status("    Use Intel VA-API/OpenCL:"  HAVE_VA_INTEL       THEN "YES (MSDK: ${VA_INTEL_MSDK_ROOT}  OpenCL: ${VA_INTEL_IOCL_ROOT})" ELSE NO)
+endif(DEFINED WITH_VA_INTEL)

 status("    Use Eigen:"      HAVE_EIGEN       THEN "YES (ver ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION})" ELSE NO)
 status("    Use Cuda:"       HAVE_CUDA        THEN "YES (ver ${CUDA_VERSION_STRING})" ELSE NO)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -0,0 +1,3 @@
+## Contributing guidelines
+
+All guidelines for contributing to the OpenCV repository can be found at [`How to contribute guideline`](https://github.com/Itseez/opencv/wiki/How_to_contribute).
--- a/cmake/OpenCVCompilerOptions.cmake
+++ b/cmake/OpenCVCompilerOptions.cmake
@ -362,5 +362,7 @@ if(MSVC)
  if(NOT ENABLE_NOISY_WARNINGS)
    ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4251) # class 'std::XXX' needs to have dll-interface to be used by clients of YYY
    ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4324) # 'struct_name' : structure was padded due to __declspec(align())
+    ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4275) # non dll-interface class 'std::exception' used as base for dll-interface class 'cv::Exception'
+    ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4589) # Constructor of abstract class 'cv::ORB' ignores initializer for virtual base class 'cv::Algorithm'
  endif()
 endif()
--- a/cmake/OpenCVDetectAndroidSDK.cmake
+++ b/cmake/OpenCVDetectAndroidSDK.cmake
@ -240,7 +240,7 @@ macro(add_android_project target path)
    foreach(f ${android_proj_files})
      add_custom_command(
        OUTPUT "${android_proj_bin_dir}/${f}"
-        COMMAND ${CMAKE_COMMAND} -E copy "${path}/${f}" "${android_proj_bin_dir}/${f}"
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different "${path}/${f}" "${android_proj_bin_dir}/${f}"
        MAIN_DEPENDENCY "${path}/${f}"
        COMMENT "Copying ${f}")
      list(APPEND android_proj_file_deps "${path}/${f}" "${android_proj_bin_dir}/${f}")
@ -353,7 +353,7 @@ macro(add_android_project target path)
    set(__android_project_chain ${target} CACHE INTERNAL "auxiliary variable used for Android progects chaining")

    # put the final .apk to the OpenCV's bin folder
-    add_custom_command(TARGET ${target} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy "${android_proj_bin_dir}/bin/${target}-debug.apk" "${OpenCV_BINARY_DIR}/bin/${target}.apk")
+    add_custom_command(TARGET ${target} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${android_proj_bin_dir}/bin/${target}-debug.apk" "${OpenCV_BINARY_DIR}/bin/${target}.apk")
    if(INSTALL_ANDROID_EXAMPLES AND "${target}" MATCHES "^example-")
      #apk
      install(FILES "${OpenCV_BINARY_DIR}/bin/${target}.apk" DESTINATION "samples" COMPONENT samples)
--- a/cmake/OpenCVDetectCXXCompiler.cmake
+++ b/cmake/OpenCVDetectCXXCompiler.cmake
@ -76,17 +76,27 @@ elseif(CMAKE_COMPILER_IS_GNUCXX)
                OUTPUT_STRIP_TRAILING_WHITESPACE)

  # Typical output in CMAKE_OPENCV_GCC_VERSION_FULL: "c+//0 (whatever) 4.2.3 (...)"
-  # Look for the version number
+  # Look for the version number, major.minor.build
  string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" CMAKE_GCC_REGEX_VERSION "${CMAKE_OPENCV_GCC_VERSION_FULL}")
-  if(NOT CMAKE_GCC_REGEX_VERSION)
+  if(NOT CMAKE_GCC_REGEX_VERSION)#major.minor
    string(REGEX MATCH "[0-9]+\\.[0-9]+" CMAKE_GCC_REGEX_VERSION "${CMAKE_OPENCV_GCC_VERSION_FULL}")
  endif()

-  # Split the three parts:
-  string(REGEX MATCHALL "[0-9]+" CMAKE_OPENCV_GCC_VERSIONS "${CMAKE_GCC_REGEX_VERSION}")
-
-  list(GET CMAKE_OPENCV_GCC_VERSIONS 0 CMAKE_OPENCV_GCC_VERSION_MAJOR)
-  list(GET CMAKE_OPENCV_GCC_VERSIONS 1 CMAKE_OPENCV_GCC_VERSION_MINOR)
+  if(CMAKE_GCC_REGEX_VERSION)
+    # Split the parts:
+    string(REGEX MATCHALL "[0-9]+" CMAKE_OPENCV_GCC_VERSIONS "${CMAKE_GCC_REGEX_VERSION}")
+
+    list(GET CMAKE_OPENCV_GCC_VERSIONS 0 CMAKE_OPENCV_GCC_VERSION_MAJOR)
+    list(GET CMAKE_OPENCV_GCC_VERSIONS 1 CMAKE_OPENCV_GCC_VERSION_MINOR)
+  else()#compiler returned just the major version number
+    string(REGEX MATCH "[0-9]+" CMAKE_GCC_REGEX_VERSION "${CMAKE_OPENCV_GCC_VERSION_FULL}")
+    if(NOT CMAKE_GCC_REGEX_VERSION)#compiler did not return anything reasonable
+      set(CMAKE_GCC_REGEX_VERSION "0")
+      message(WARNING "GCC version not detected!")
+    endif()
+    set(CMAKE_OPENCV_GCC_VERSION_MAJOR ${CMAKE_GCC_REGEX_VERSION})
+    set(CMAKE_OPENCV_GCC_VERSION_MINOR 0)
+  endif()

  set(CMAKE_OPENCV_GCC_VERSION ${CMAKE_OPENCV_GCC_VERSION_MAJOR}${CMAKE_OPENCV_GCC_VERSION_MINOR})
  math(EXPR CMAKE_OPENCV_GCC_VERSION_NUM "${CMAKE_OPENCV_GCC_VERSION_MAJOR}*100 + ${CMAKE_OPENCV_GCC_VERSION_MINOR}")
@ -116,6 +126,12 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
  set(AARCH64 1)
 endif()

+# Workaround for 32-bit operating systems on 64-bit x86_64 processor
+if(X86_64 AND CMAKE_SIZEOF_VOID_P EQUAL 4 AND NOT FORCE_X86_64)
+  message(STATUS "sizeof(void) = 4 on x86 / x86_64 processor. Assume 32-bit compilation mode (X86=1)")
+  unset(X86_64)
+  set(X86 1)
+endif()

 # Similar code exists in OpenCVConfig.cmake
 if(NOT DEFINED OpenCV_STATIC)
--- a/cmake/OpenCVFindLibsVideo.cmake
+++ b/cmake/OpenCVFindLibsVideo.cmake
@ -318,10 +318,18 @@ if(WITH_GPHOTO2)
  CHECK_MODULE(libgphoto2 HAVE_GPHOTO2)
 endif(WITH_GPHOTO2)

-# --- VA-API ---
-if(WITH_VAAPI)
-  include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindVAAPI.cmake")
-  if(VAAPI_IOCL_INCLUDE_DIR)
-    ocv_include_directories(${VAAPI_IOCL_INCLUDE_DIR})
+# --- VA & VA_INTEL ---
+if(WITH_VA_INTEL)
+  include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindVA_INTEL.cmake")
+  if(VA_INTEL_IOCL_INCLUDE_DIR)
+    ocv_include_directories(${VA_INTEL_IOCL_INCLUDE_DIR})
  endif()
-endif(WITH_VAAPI)
+  set(WITH_VA YES)
+endif(WITH_VA_INTEL)
+
+if(WITH_VA)
+  include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindVA.cmake")
+  if(VA_INCLUDE_DIR)
+    ocv_include_directories(${VA_INCLUDE_DIR})
+  endif()
+endif(WITH_VA)
--- a/cmake/OpenCVFindVA.cmake
+++ b/cmake/OpenCVFindVA.cmake
@ -0,0 +1,19 @@
+# Main variables:
+# HAVE_VA for conditional compilation OpenCV with/without libva
+
+if(UNIX AND NOT ANDROID)
+    find_path(
+    VA_INCLUDE_DIR
+    NAMES va/va.h
+    PATHS "/usr/include"
+    PATH_SUFFIXES include
+    DOC "Path to libva headers")
+endif()
+
+if(VA_INCLUDE_DIR)
+    set(HAVE_VA TRUE)
+    set(VA_LIBRARIES "-lva" "-lva-drm")
+else()
+    set(HAVE_VA FALSE)
+    message(WARNING "libva installation is not found.")
+endif()
--- a/cmake/OpenCVFindVAAPI.cmake
+++ b/cmake/OpenCVFindVAAPI.cmake
@ -1,44 +0,0 @@
-# Main variables:
-# VAAPI_MSDK_INCLUDE_DIR and VAAPI_IOCL_INCLUDE_DIR to use VAAPI
-# HAVE_VAAPI for conditional compilation OpenCV with/without VAAPI
-
-# VAAPI_MSDK_ROOT - root of Intel MSDK installation
-# VAAPI_IOCL_ROOT - root of Intel OCL installation
-
-if(UNIX AND NOT ANDROID)
-    if($ENV{VAAPI_MSDK_ROOT})
-        set(VAAPI_MSDK_ROOT $ENV{VAAPI_MSDK_ROOT})
-    else()
-        set(VAAPI_MSDK_ROOT "/opt/intel/mediasdk")
-    endif()
-
-    if($ENV{VAAPI_IOCL_ROOT})
-        set(VAAPI_IOCL_ROOT $ENV{VAAPI_IOCL_ROOT})
-    else()
-        set(VAAPI_IOCL_ROOT "/opt/intel/opencl")
-    endif()
-
-    find_path(
-    VAAPI_MSDK_INCLUDE_DIR
-    NAMES mfxdefs.h
-    PATHS ${VAAPI_MSDK_ROOT}
-    PATH_SUFFIXES include
-    DOC "Path to Intel MSDK headers")
-
-    find_path(
-    VAAPI_IOCL_INCLUDE_DIR
-    NAMES CL/va_ext.h
-    PATHS ${VAAPI_IOCL_ROOT}
-    PATH_SUFFIXES include
-    DOC "Path to Intel OpenCL headers")
-endif()
-
-if(VAAPI_MSDK_INCLUDE_DIR AND VAAPI_IOCL_INCLUDE_DIR)
-    set(HAVE_VAAPI TRUE)
-    set(VAAPI_EXTRA_LIBS "-lva" "-lva-drm")
-else()
-    set(HAVE_VAAPI FALSE)
-    message(WARNING "Intel MSDK & OpenCL installation is not found.")
-endif()
-
-mark_as_advanced(FORCE VAAPI_MSDK_INCLUDE_DIR VAAPI_IOCL_INCLUDE_DIR)
--- a/cmake/OpenCVFindVA_INTEL.cmake
+++ b/cmake/OpenCVFindVA_INTEL.cmake
@ -0,0 +1,44 @@
+# Main variables:
+# VA_INTEL_MSDK_INCLUDE_DIR and VA_INTEL_IOCL_INCLUDE_DIR to use VA_INTEL
+# HAVE_VA_INTEL for conditional compilation OpenCV with/without VA_INTEL
+
+# VA_INTEL_MSDK_ROOT - root of Intel MSDK installation
+# VA_INTEL_IOCL_ROOT - root of Intel OCL installation
+
+if(UNIX AND NOT ANDROID)
+    if($ENV{VA_INTEL_MSDK_ROOT})
+        set(VA_INTEL_MSDK_ROOT $ENV{VA_INTEL_MSDK_ROOT})
+    else()
+        set(VA_INTEL_MSDK_ROOT "/opt/intel/mediasdk")
+    endif()
+
+    if($ENV{VA_INTEL_IOCL_ROOT})
+        set(VA_INTEL_IOCL_ROOT $ENV{VA_INTEL_IOCL_ROOT})
+    else()
+        set(VA_INTEL_IOCL_ROOT "/opt/intel/opencl")
+    endif()
+
+    find_path(
+    VA_INTEL_MSDK_INCLUDE_DIR
+    NAMES mfxdefs.h
+    PATHS ${VA_INTEL_MSDK_ROOT}
+    PATH_SUFFIXES include
+    DOC "Path to Intel MSDK headers")
+
+    find_path(
+    VA_INTEL_IOCL_INCLUDE_DIR
+    NAMES CL/va_ext.h
+    PATHS ${VA_INTEL_IOCL_ROOT}
+    PATH_SUFFIXES include
+    DOC "Path to Intel OpenCL headers")
+endif()
+
+if(VA_INTEL_MSDK_INCLUDE_DIR AND VA_INTEL_IOCL_INCLUDE_DIR)
+    set(HAVE_VA_INTEL TRUE)
+    set(VA_INTEL_LIBRARIES "-lva" "-lva-drm")
+else()
+    set(HAVE_VA_INTEL FALSE)
+    message(WARNING "Intel MSDK & OpenCL installation is not found.")
+endif()
+
+mark_as_advanced(FORCE VA_INTEL_MSDK_INCLUDE_DIR VA_INTEL_IOCL_INCLUDE_DIR)
--- a/cmake/OpenCVModule.cmake
+++ b/cmake/OpenCVModule.cmake
@ -26,6 +26,7 @@

 # To control the setup of the module you could also set:
 # the_description - text to be used as current module description
+# the_label - label for current module
 # OPENCV_MODULE_TYPE - STATIC|SHARED - set to force override global settings for current module
 # OPENCV_MODULE_IS_PART_OF_WORLD - ON|OFF (default ON) - should the module be added to the opencv_world?
 # BUILD_${the_module}_INIT - ON|OFF (default ON) - initial value for BUILD_${the_module}
@ -56,6 +57,8 @@ foreach(mod ${OPENCV_MODULES_BUILD} ${OPENCV_MODULES_DISABLED_USER} ${OPENCV_MOD
  if(HAVE_${mod})
    unset(HAVE_${mod} CACHE)
  endif()
+  unset(OPENCV_MODULE_${mod}_DEPS CACHE)
+  unset(OPENCV_MODULE_${mod}_DEPS_EXT CACHE)
  unset(OPENCV_MODULE_${mod}_REQ_DEPS CACHE)
  unset(OPENCV_MODULE_${mod}_OPT_DEPS CACHE)
  unset(OPENCV_MODULE_${mod}_PRIVATE_REQ_DEPS CACHE)
@ -189,6 +192,15 @@ macro(ocv_add_module _name)
      set(OPENCV_MODULE_${the_module}_IS_PART_OF_WORLD OFF CACHE INTERNAL "")
    endif()

+    if(NOT DEFINED the_label)
+      if(OPENCV_PROCESSING_EXTRA_MODULES)
+        set(the_label "Extra")
+      else()
+        set(the_label "Main")
+      endif()
+    endif()
+    set(OPENCV_MODULE_${the_module}_LABEL "${the_label};${the_module}" CACHE INTERNAL "")
+
    if(BUILD_${the_module})
      set(OPENCV_MODULES_BUILD ${OPENCV_MODULES_BUILD} "${the_module}" CACHE INTERNAL "List of OpenCV modules included into the build")
    else()
@ -471,7 +483,6 @@ function(__ocv_resolve_dependencies)
  # reorder dependencies
  foreach(m ${OPENCV_MODULES_BUILD})
    __ocv_sort_modules_by_deps(OPENCV_MODULE_${m}_DEPS)
-    ocv_list_sort(OPENCV_MODULE_${m}_DEPS_EXT)

    set(LINK_DEPS ${OPENCV_MODULE_${m}_DEPS})

@ -762,6 +773,10 @@ macro(_ocv_create_module)
  unset(sub_links)
  unset(cuda_objs)

+  set_target_properties(${the_module} PROPERTIES LABELS "${OPENCV_MODULE_${the_module}_LABEL};Module")
+  set_source_files_properties(${OPENCV_MODULE_${the_module}_HEADERS} ${OPENCV_MODULE_${the_module}_SOURCES} ${${the_module}_pch}
+    PROPERTIES LABELS "${OPENCV_MODULE_${the_module}_LABEL};Module")
+
  ocv_target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS_TO_LINK})
  ocv_target_link_libraries(${the_module} LINK_INTERFACE_LIBRARIES ${OPENCV_MODULE_${the_module}_DEPS_TO_LINK})
  ocv_target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS_EXT} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${ARGN})
@ -969,6 +984,10 @@ function(ocv_add_perf_tests)
      ocv_target_link_libraries(${the_target} ${perf_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS})
      add_dependencies(opencv_perf_tests ${the_target})

+      set_target_properties(${the_target} PROPERTIES LABELS "${OPENCV_MODULE_${the_module}_LABEL};PerfTest")
+      set_source_files_properties(${OPENCV_PERF_${the_module}_SOURCES} ${${the_target}_pch}
+        PROPERTIES LABELS "${OPENCV_MODULE_${the_module}_LABEL};PerfTest")
+
      # Additional target properties
      set_target_properties(${the_target} PROPERTIES
        DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
@ -989,6 +1008,12 @@ function(ocv_add_perf_tests)
      if(NOT BUILD_opencv_world)
        _ocv_add_precompiled_headers(${the_target})
      endif()
+
+      ocv_add_test_from_target("${the_target}" "Performance" "${the_target}")
+      ocv_add_test_from_target("opencv_sanity_${name}" "Sanity" "${the_target}"
+                               "--perf_min_samples=1"
+                               "--perf_force_samples=1"
+                               "--perf_verify_sanity")
    else(OCV_DEPENDENCIES_FOUND)
      # TODO: warn about unsatisfied dependencies
    endif(OCV_DEPENDENCIES_FOUND)
@ -1035,6 +1060,10 @@ function(ocv_add_accuracy_tests)
      ocv_target_link_libraries(${the_target} ${test_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS})
      add_dependencies(opencv_tests ${the_target})

+      set_target_properties(${the_target} PROPERTIES LABELS "${OPENCV_MODULE_${the_module}_LABEL};AccuracyTest")
+      set_source_files_properties(${OPENCV_TEST_${the_module}_SOURCES} ${${the_target}_pch}
+        PROPERTIES LABELS "${OPENCV_MODULE_${the_module}_LABEL};AccuracyTest")
+
      # Additional target properties
      set_target_properties(${the_target} PROPERTIES
        DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
@ -1045,21 +1074,11 @@ function(ocv_add_accuracy_tests)
        set_target_properties(${the_target} PROPERTIES FOLDER "tests accuracy")
      endif()

-      enable_testing()
-      get_target_property(LOC ${the_target} LOCATION)
-      add_test(${the_target} "${LOC}")
-
-      if(WINRT)
-        # removing APPCONTAINER from tests to run from console
-        # look for detailed description inside of ocv_create_module macro above
-        add_custom_command(TARGET "opencv_test_${name}"
-                           POST_BUILD
-                           COMMAND link.exe /edit /APPCONTAINER:NO $(TargetPath))
-      endif()
-
      if(NOT BUILD_opencv_world)
        _ocv_add_precompiled_headers(${the_target})
      endif()
+
+      ocv_add_test_from_target("${the_target}" "Accuracy" "${the_target}")
    else(OCV_DEPENDENCIES_FOUND)
      # TODO: warn about unsatisfied dependencies
    endif(OCV_DEPENDENCIES_FOUND)
@ -1092,6 +1111,10 @@ function(ocv_add_samples)
        ocv_target_link_libraries(${the_target} ${samples_deps})
        set_target_properties(${the_target} PROPERTIES PROJECT_LABEL "(sample) ${name}")

+        set_target_properties(${the_target} PROPERTIES LABELS "${OPENCV_MODULE_${the_module}_LABEL};Sample")
+        set_source_files_properties("${source}"
+          PROPERTIES LABELS "${OPENCV_MODULE_${the_module}_LABEL};Sample")
+
        if(ENABLE_SOLUTION_FOLDERS)
          set_target_properties(${the_target} PROPERTIES
            OUTPUT_NAME "${module_id}-example-${name}"
--- a/cmake/OpenCVPCHSupport.cmake
+++ b/cmake/OpenCVPCHSupport.cmake
@ -277,7 +277,7 @@ MACRO(ADD_PRECOMPILED_HEADER _targetName _input)

    ADD_CUSTOM_COMMAND(
      OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${_name}"
-      COMMAND ${CMAKE_COMMAND} -E copy  "${_input}" "${CMAKE_CURRENT_BINARY_DIR}/${_name}" # ensure same directory! Required by gcc
+      COMMAND ${CMAKE_COMMAND} -E copy_if_different "${_input}" "${CMAKE_CURRENT_BINARY_DIR}/${_name}" # ensure same directory! Required by gcc
      DEPENDS "${_input}"
      )

--- a/cmake/OpenCVPackaging.cmake
+++ b/cmake/OpenCVPackaging.cmake
@ -20,21 +20,24 @@ endif(NOT OPENCV_CUSTOM_PACKAGE_INFO)

 #arch
 if(X86)
-  set(CPACK_DEBIAN_ARCHITECTURE "i386")
+  set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "i386")
  set(CPACK_RPM_PACKAGE_ARCHITECTURE "i686")
 elseif(X86_64)
-  set(CPACK_DEBIAN_ARCHITECTURE "amd64")
+  set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "amd64")
  set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64")
 elseif(ARM)
-  set(CPACK_DEBIAN_ARCHITECTURE "armhf")
+  set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "armhf")
  set(CPACK_RPM_PACKAGE_ARCHITECTURE "armhf")
+elseif(AARCH64)
+  set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "arm64")
+  set(CPACK_RPM_PACKAGE_ARCHITECTURE "aarch64")
 else()
-  set(CPACK_DEBIAN_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR})
+  set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR})
  set(CPACK_RPM_PACKAGE_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR})
 endif()

 if(CPACK_GENERATOR STREQUAL "DEB")
-  set(OPENCV_PACKAGE_ARCH_SUFFIX ${CPACK_DEBIAN_ARCHITECTURE})
+  set(OPENCV_PACKAGE_ARCH_SUFFIX ${CPACK_DEBIAN_PACKAGE_ARCHITECTURE})
 elseif(CPACK_GENERATOR STREQUAL "RPM")
  set(OPENCV_PACKAGE_ARCH_SUFFIX ${CPACK_RPM_PACKAGE_ARCHITECTURE})
 else()
@ -96,6 +99,46 @@ if(HAVE_CUDA)
  set(CPACK_COMPONENT_dev_DEPENDS libs)
 endif()

+if(HAVE_TBB AND NOT BUILD_TBB)
+  if(CPACK_DEB_DEV_PACKAGE_DEPENDS)
+    set(CPACK_DEB_DEV_PACKAGE_DEPENDS "${CPACK_DEB_DEV_PACKAGE_DEPENDS}, libtbb-dev")
+  else()
+    set(CPACK_DEB_DEV_PACKAGE_DEPENDS "libtbb-dev")
+  endif()
+endif()
+
+set(STD_OPENCV_LIBS opencv-data)
+set(STD_OPENCV_DEV libopencv-dev)
+
+foreach(module calib3d core cudaarithm cudabgsegm cudacodec cudafeatures2d cudafilters
+               cudaimgproc cudalegacy cudaobjdetect cudaoptflow cudastereo cudawarping
+               cudev features2d flann hal highgui imgcodecs imgproc ml objdetect ocl
+               photo shape stitching superres ts video videoio videostab viz)
+  if(HAVE_opencv_${module})
+    list(APPEND STD_OPENCV_LIBS "libopencv-${module}3.0")
+    list(APPEND STD_OPENCV_DEV "libopencv-${module}-dev")
+  endif()
+endforeach()
+
+string(REPLACE ";" ", " CPACK_COMPONENT_LIBS_CONFLICTS "${STD_OPENCV_LIBS}")
+string(REPLACE ";" ", " CPACK_COMPONENT_LIBS_PROVIDES "${STD_OPENCV_LIBS}")
+string(REPLACE ";" ", " CPACK_COMPONENT_LIBS_REPLACES "${STD_OPENCV_LIBS}")
+
+string(REPLACE ";" ", " CPACK_COMPONENT_DEV_CONFLICTS "${STD_OPENCV_DEV}")
+string(REPLACE ";" ", " CPACK_COMPONENT_DEV_PROVIDES "${STD_OPENCV_DEV}")
+string(REPLACE ";" ", " CPACK_COMPONENT_DEV_REPLACES "${STD_OPENCV_DEV}")
+
+set(CPACK_COMPONENT_PYTHON_CONFLICTS python-opencv)
+set(CPACK_COMPONENT_PYTHON_PROVIDES python-opencv)
+set(CPACK_COMPONENT_PYTHON_REPLACES python-opencv)
+
+set(CPACK_COMPONENT_JAVA_CONFLICTS "libopencv3.0-java, libopencv3.0-jni")
+set(CPACK_COMPONENT_JAVA_PROVIDES "libopencv3.0-java, libopencv3.0-jni")
+set(CPACK_COMPONENT_JAVA_REPLACES "libopencv3.0-java, libopencv3.0-jni")
+
+set(CPACK_COMPONENT_DOCS_CONFLICTS opencv-doc)
+set(CPACK_COMPONENT_SAMPLES_CONFLICTS opencv-doc)
+
 if(NOT OPENCV_CUSTOM_PACKAGE_INFO)
  set(CPACK_COMPONENT_LIBS_DESCRIPTION "Open Computer Vision Library")
  set(CPACK_DEBIAN_COMPONENT_LIBS_NAME "lib${CMAKE_PROJECT_NAME}")
--- a/cmake/OpenCVUtils.cmake
+++ b/cmake/OpenCVUtils.cmake
@ -911,3 +911,32 @@ function(ocv_download)

  set(DOWNLOAD_PACKAGE_LOCATION ${DOWNLOAD_TARGET} PARENT_SCOPE)
 endfunction()
+
+function(ocv_add_test_from_target test_name test_kind the_target)
+  if(CMAKE_VERSION VERSION_GREATER "2.8" AND NOT CMAKE_CROSSCOMPILING)
+    if(NOT "${test_kind}" MATCHES "^(Accuracy|Performance|Sanity)$")
+      message(FATAL_ERROR "Unknown test kind : ${test_kind}")
+    endif()
+    if(NOT TARGET "${the_target}")
+      message(FATAL_ERROR "${the_target} is not a CMake target")
+    endif()
+
+    string(TOLOWER "${test_kind}" test_kind_lower)
+    set(test_report_dir "${CMAKE_BINARY_DIR}/test-reports/${test_kind_lower}")
+    file(MAKE_DIRECTORY "${test_report_dir}")
+
+    add_test(NAME "${test_name}"
+      COMMAND "${the_target}"
+              "--gtest_output=xml:${the_target}.xml"
+              ${ARGN})
+
+    set_tests_properties("${test_name}" PROPERTIES
+      LABELS "${OPENCV_MODULE_${the_module}_LABEL};${test_kind}"
+      WORKING_DIRECTORY "${test_report_dir}")
+
+    if(OPENCV_TEST_DATA_PATH)
+      set_tests_properties("${test_name}" PROPERTIES
+        ENVIRONMENT "OPENCV_TEST_DATA_PATH=${OPENCV_TEST_DATA_PATH}")
+    endif()
+  endif()
+endfunction()
--- a/cmake/templates/cvconfig.h.in
+++ b/cmake/templates/cvconfig.h.in
@ -189,5 +189,8 @@
 /* gPhoto2 library */
 #cmakedefine HAVE_GPHOTO2

-/* Intel VA-API */
-#cmakedefine HAVE_VAAPI
+/* VA library (libva) */
+#cmakedefine HAVE_VA
+
+/* Intel VA-API/OpenCL */
+#cmakedefine HAVE_VA_INTEL
--- a/cmake/templates/opencv_run_all_tests_unix.sh.in
+++ b/cmake/templates/opencv_run_all_tests_unix.sh.in
@ -55,6 +55,15 @@ OPENCV_TEST_PATH=@CMAKE_INSTALL_PREFIX@/@OPENCV_TEST_INSTALL_PATH@
 OPENCV_PYTHON_TESTS=@OPENCV_PYTHON_TESTS_LIST@
 export OPENCV_TEST_DATA_PATH=@CMAKE_INSTALL_PREFIX@/share/OpenCV/testdata

+CUR_DIR=`pwd`
+if [ -d "$CUR_DIR" -a -w "$CUR_DIR" ]; then
+    echo "${TEXT_CYAN}CUR_DIR : $CUR_DIR${TEXT_RESET}"
+else
+    echo "${TEXT_RED}Error: Do not have permissions to write to $CUR_DIR${TEXT_RESET}"
+    echo "${TEXT_RED}Please run the script from directory with write access${TEXT_RESET}"
+    exit 1
+fi
+
 # Run tests

 SUMMARY_STATUS=0
@ -64,9 +73,8 @@ PASSED_TESTS=""
 for t in "$OPENCV_TEST_PATH/"opencv_test_* "$OPENCV_TEST_PATH/"opencv_perf_*;
 do
    test_name=`basename "$t"`
-    report="$test_name-`date --rfc-3339=date`.xml"

-    cmd="$t --perf_min_samples=1 --perf_force_samples=1 --gtest_output=xml:\"$report\""
+    cmd="$t --perf_min_samples=1 --perf_force_samples=1 --gtest_output=xml:$test_name.xml"

    seg_reg="s/^/${TEXT_CYAN}[$test_name]${TEXT_RESET} /"                     # append test name
    if [ $COLOR_OUTPUT -eq 1 ]; then
@ -79,7 +87,7 @@ do
    fi

    echo "${TEXT_CYAN}[$test_name]${TEXT_RESET} RUN : $cmd"
-    $cmd | sed -r "$seg_reg"
+    eval "$cmd" | tee "$test_name.log" | sed -r "$seg_reg"
    ret=${PIPESTATUS[0]}
    echo "${TEXT_CYAN}[$test_name]${TEXT_RESET} RETURN_CODE : $ret"

@ -98,14 +106,13 @@ done
 for t in $OPENCV_PYTHON_TESTS;
 do
    test_name=`basename "$t"`
-    report="$test_name-`date --rfc-3339=date`.xml"

-    cmd="py.test --junitxml $report \"$OPENCV_TEST_PATH\"/$t"
+    cmd="py.test --junitxml $test_name.xml \"$OPENCV_TEST_PATH\"/$t"

    seg_reg="s/^/${TEXT_CYAN}[$test_name]${TEXT_RESET} /"                 # append test name

    echo "${TEXT_CYAN}[$test_name]${TEXT_RESET} RUN : $cmd"
-    eval "$cmd" | sed -r "$seg_reg"
+    eval "$cmd" | tee "$test_name.log" | sed -r "$seg_reg"

    ret=${PIPESTATUS[0]}
    echo "${TEXT_CYAN}[$test_name]${TEXT_RESET} RETURN_CODE : $ret"
--- a/doc/py_tutorials/py_feature2d/py_brief/py_brief.markdown
+++ b/doc/py_tutorials/py_feature2d/py_brief/py_brief.markdown
@ -48,6 +48,8 @@ BRIEF in OpenCV

 Below code shows the computation of BRIEF descriptors with the help of CenSurE detector. (CenSurE
 detector is called STAR detector in OpenCV)
+
+note, that you need [opencv contrib](https://github.com/Itseez/opencv_contrib)) to use this.
@code{.py}
 import numpy as np
 import cv2
@ -55,11 +57,11 @@ from matplotlib import pyplot as plt

 img = cv2.imread('simple.jpg',0)

-# Initiate STAR detector
-star = cv2.FeatureDetector_create("STAR")
+# Initiate FAST detector
+star = cv2.xfeatures2d.StarDetector_create()

 # Initiate BRIEF extractor
-brief = cv2.DescriptorExtractor_create("BRIEF")
+brief = cv2.BriefDescriptorExtractor_create()

 # find the keypoints with STAR
 kp = star.detect(img,None)
--- a/doc/py_tutorials/py_feature2d/py_fast/py_fast.markdown
+++ b/doc/py_tutorials/py_feature2d/py_fast/py_fast.markdown
@ -101,7 +101,7 @@ from matplotlib import pyplot as plt
 img = cv2.imread('simple.jpg',0)

 # Initiate FAST object with default values
-fast = cv2.FastFeatureDetector()
+fast = cv2.FastFeatureDetector_create()

 # find and draw the keypoints
 kp = fast.detect(img,None)
--- a/doc/py_tutorials/py_feature2d/py_feature_homography/py_feature_homography.markdown
+++ b/doc/py_tutorials/py_feature2d/py_feature_homography/py_feature_homography.markdown
@ -44,7 +44,7 @@ img1 = cv2.imread('box.png',0)          # queryImage
 img2 = cv2.imread('box_in_scene.png',0) # trainImage

 # Initiate SIFT detector
-sift = cv2.SIFT()
+sift = cv2.xfeatures2d.SIFT_create()

 # find the keypoints and descriptors with SIFT
 kp1, des1 = sift.detectAndCompute(img1,None)
@ -78,7 +78,7 @@ if len(good)>MIN_MATCH_COUNT:
    M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
    matchesMask = mask.ravel().tolist()

-    h,w = img1.shape
+    h,w,d = img1.shape
    pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
    dst = cv2.perspectiveTransform(pts,M)

--- a/doc/py_tutorials/py_feature2d/py_features_harris/py_features_harris.markdown
+++ b/doc/py_tutorials/py_feature2d/py_features_harris/py_features_harris.markdown
@ -77,7 +77,7 @@ See the example below:
 import cv2
 import numpy as np

-filename = 'chessboard.jpg'
+filename = 'chessboard.png'
 img = cv2.imread(filename)
 gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

--- a/doc/py_tutorials/py_feature2d/py_orb/py_orb.markdown
+++ b/doc/py_tutorials/py_feature2d/py_orb/py_orb.markdown
@ -69,8 +69,8 @@ from matplotlib import pyplot as plt

 img = cv2.imread('simple.jpg',0)

-# Initiate STAR detector
-orb = cv2.ORB()
+# Initiate ORB detector
+orb = cv2.ORB_create()

 # find the keypoints with ORB
 kp = orb.detect(img,None)
--- a/doc/py_tutorials/py_feature2d/py_shi_tomasi/py_shi_tomasi.markdown
+++ b/doc/py_tutorials/py_feature2d/py_shi_tomasi/py_shi_tomasi.markdown
@ -50,7 +50,7 @@ import numpy as np
 import cv2
 from matplotlib import pyplot as plt

-img = cv2.imread('simple.jpg')
+img = cv2.imread('blox.jpg')
 gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

 corners = cv2.goodFeaturesToTrack(gray,25,0.01,10)
--- a/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown
+++ b/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown
@ -104,7 +104,7 @@ greater than 0.8, they are rejected. It eliminaters around 90% of false matches

 So this is a summary of SIFT algorithm. For more details and understanding, reading the original
 paper is highly recommended. Remember one thing, this algorithm is patented. So this algorithm is
-included in Non-free module in OpenCV.
+included in [the opencv contrib repo](https://github.com/Itseez/opencv_contrib)

 SIFT in OpenCV
 --------------
@ -119,7 +119,7 @@ import numpy as np
 img = cv2.imread('home.jpg')
 gray= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

-sift = cv2.SIFT()
+sift = cv2.xfeatures2d.SIFT_create()
 kp = sift.detect(gray,None)

 img=cv2.drawKeypoints(gray,kp)
@ -151,7 +151,7 @@ Now to calculate the descriptor, OpenCV provides two methods.

 We will see the second method:
@code{.py}
-sift = cv2.SIFT()
+sift = cv2.xfeatures2d.SIFT_create()
 kp, des = sift.detectAndCompute(gray,None)
@endcode
 Here kp will be a list of keypoints and des is a numpy array of shape
--- a/doc/py_tutorials/py_feature2d/py_surf_intro/py_surf_intro.markdown
+++ b/doc/py_tutorials/py_feature2d/py_surf_intro/py_surf_intro.markdown
@ -80,7 +80,7 @@ examples are shown in Python terminal since it is just same as SIFT only.

 # Create SURF object. You can specify params here or later.
 # Here I set Hessian Threshold to 400
->>> surf = cv2.SURF(400)
+>>> surf = cv2.xfeatures2d.SURF_create(400)

 # Find keypoints and descriptors directly
 >>> kp, des = surf.detectAndCompute(img,None)
@ -92,12 +92,12 @@ examples are shown in Python terminal since it is just same as SIFT only.
 While matching, we may need all those features, but not now. So we increase the Hessian Threshold.
@code{.py}
 # Check present Hessian threshold
->>> print surf.hessianThreshold
+>>> print surf.getHessianThreshold()
 400.0

 # We set it to some 50000. Remember, it is just for representing in picture.
 # In actual cases, it is better to have a value 300-500
->>> surf.hessianThreshold = 50000
+>>> surf.setHessianThreshold(50000)

 # Again compute keypoints and check its number.
 >>> kp, des = surf.detectAndCompute(img,None)
@ -119,10 +119,10 @@ on wings of butterfly. You can test it with other images.
 Now I want to apply U-SURF, so that it won't find the orientation.
@code{.py}
 # Check upright flag, if it False, set it to True
->>> print surf.upright
+>>> print surf.getUpright()
 False

->>> surf.upright = True
+>>> surf.setUpright(True)

 # Recompute the feature points and draw it
 >>> kp = surf.detect(img,None)
@ -143,7 +143,7 @@ Finally we check the descriptor size and change it to 128 if it is only 64-dim.
 64

 # That means flag, "extended" is False.
->>> surf.extended
+>>> surf.getExtended()
 False

 # So we make it to True to get 128-dim descriptors.
--- a/doc/py_tutorials/py_gui/py_video_display/py_video_display.markdown
+++ b/doc/py_tutorials/py_gui/py_video_display/py_video_display.markdown
@ -113,7 +113,7 @@ platform dependent. Following codecs works fine for me.
 -   In OSX : *(I don't have access to OSX. Can some one fill this?)*

 FourCC code is passed as cv2.VideoWriter_fourcc('M','J','P','G') or
-cv2.VideoWriter_fourcc(\*'MJPG) for MJPG.
+cv2.VideoWriter_fourcc(\*'MJPG') for MJPG.

 Below code capture from a Camera, flip every frame in vertical direction and saves it.
@code{.py}
--- a/doc/tutorials/ml/introduction_to_svm/introduction_to_svm.markdown
+++ b/doc/tutorials/ml/introduction_to_svm/introduction_to_svm.markdown
@ -94,6 +94,8 @@ the weight vector \f$\beta\f$ and the bias \f$\beta_{0}\f$ of the optimal hyperp
 Source Code
 -----------

+@note The following code has been implemented with OpenCV 3.0 classes and functions. An equivalent version of the code using OpenCV 2.4 can be found in [this page.](http://docs.opencv.org/2.4/doc/tutorials/ml/introduction_to_svm/introduction_to_svm.html#introductiontosvms)
+
@include cpp/tutorial_code/ml/introduction_to_svm/introduction_to_svm.cpp

 Explanation
--- a/doc/tutorials/ml/non_linear_svms/non_linear_svms.markdown
+++ b/doc/tutorials/ml/non_linear_svms/non_linear_svms.markdown
@ -89,6 +89,9 @@ Source Code
 You may also find the source code in `samples/cpp/tutorial_code/ml/non_linear_svms` folder of the OpenCV source library or
 [download it from here](https://github.com/Itseez/opencv/tree/master/samples/cpp/tutorial_code/ml/non_linear_svms/non_linear_svms.cpp).

+@note The following code has been implemented with OpenCV 3.0 classes and functions. An equivalent version of the code
+using OpenCV 2.4 can be found in [this page.](http://docs.opencv.org/2.4/doc/tutorials/ml/non_linear_svms/non_linear_svms.html#nonlinearsvms)
+
@include cpp/tutorial_code/ml/non_linear_svms/non_linear_svms.cpp

 Explanation
--- a/modules/calib3d/src/calibinit.cpp
+++ b/modules/calib3d/src/calibinit.cpp
@ -154,7 +154,7 @@ struct CvCBQuad
 //static CvMat* debug_img = 0;

 static int icvGenerateQuads( CvCBQuad **quads, CvCBCorner **corners,
-                             CvMemStorage *storage, CvMat *image, int flags );
+                             CvMemStorage *storage, CvMat *image, int flags, int *max_quad_buf_size);

 /*static int
 icvGenerateQuadsEx( CvCBQuad **out_quads, CvCBCorner **out_corners,
@ -174,7 +174,7 @@ static int icvCleanFoundConnectedQuads( int quad_count,

 static int icvOrderFoundConnectedQuads( int quad_count, CvCBQuad **quads,
           int *all_count, CvCBQuad **all_quads, CvCBCorner **corners,
-           CvSize pattern_size, CvMemStorage* storage );
+           CvSize pattern_size, int max_quad_buf_size, CvMemStorage* storage );

 static void icvOrderQuad(CvCBQuad *quad, CvCBCorner *corner, int common);

@ -185,7 +185,7 @@ static int icvTrimRow(CvCBQuad **quads, int count, int row, int dir);
 #endif

 static int icvAddOuterQuad(CvCBQuad *quad, CvCBQuad **quads, int quad_count,
-                    CvCBQuad **all_quads, int all_count, CvCBCorner **corners);
+                    CvCBQuad **all_quads, int all_count, CvCBCorner **corners, int max_quad_buf_size);

 static void icvRemoveQuadFromGroup(CvCBQuad **quads, int count, CvCBQuad *q0);

@ -314,6 +314,7 @@ int cvFindChessboardCorners( const void* arr, CvSize pattern_size,
    // making it difficult to detect smaller squares.
    for( k = 0; k < 6; k++ )
    {
+        int max_quad_buf_size = 0;
        for( dilations = min_dilations; dilations <= max_dilations; dilations++ )
        {
            if (found)
@ -369,7 +370,7 @@ int cvFindChessboardCorners( const void* arr, CvSize pattern_size,
                cvRectangle( thresh_img, cvPoint(0,0), cvPoint(thresh_img->cols-1,
                    thresh_img->rows-1), CV_RGB(255,255,255), 3, 8);

-                quad_count = icvGenerateQuads( &quads, &corners, storage, thresh_img, flags );
+                quad_count = icvGenerateQuads( &quads, &corners, storage, thresh_img, flags, &max_quad_buf_size);

                PRINTF("Quad count: %d/%d\n", quad_count, expected_corners_num);
            }
@ -409,8 +410,8 @@ int cvFindChessboardCorners( const void* arr, CvSize pattern_size,
            // allocate extra for adding in icvOrderFoundQuads
            cvFree(&quad_group);
            cvFree(&corner_group);
-            quad_group = (CvCBQuad**)cvAlloc( sizeof(quad_group[0]) * (quad_count+quad_count / 2));
-            corner_group = (CvCBCorner**)cvAlloc( sizeof(corner_group[0]) * (quad_count+quad_count / 2)*4 );
+            quad_group = (CvCBQuad**)cvAlloc( sizeof(quad_group[0]) * max_quad_buf_size);
+            corner_group = (CvCBCorner**)cvAlloc( sizeof(corner_group[0]) * max_quad_buf_size * 4 );

            for( group_idx = 0; ; group_idx++ )
            {
@ -425,7 +426,7 @@ int cvFindChessboardCorners( const void* arr, CvSize pattern_size,
                // maybe delete or add some
                PRINTF("Starting ordering of inner quads\n");
                count = icvOrderFoundConnectedQuads(count, quad_group, &quad_count, &quads, &corners,
-                    pattern_size, storage );
+                    pattern_size, max_quad_buf_size, storage );
                PRINTF("Orig count: %d  After ordering: %d\n", icount, count);


@ -624,7 +625,7 @@ icvCheckBoardMonotony( CvPoint2D32f* corners, CvSize pattern_size )
 static int
 icvOrderFoundConnectedQuads( int quad_count, CvCBQuad **quads,
        int *all_count, CvCBQuad **all_quads, CvCBCorner **corners,
-        CvSize pattern_size, CvMemStorage* storage )
+        CvSize pattern_size, int max_quad_buf_size, CvMemStorage* storage )
 {
    cv::Ptr<CvMemStorage> temp_storage(cvCreateChildMemStorage( storage ));
    CvSeq* stack = cvCreateSeq( 0, sizeof(*stack), sizeof(void*), temp_storage );
@ -804,15 +805,18 @@ icvOrderFoundConnectedQuads( int quad_count, CvCBQuad **quads,
    if (found > 0)
    {
        PRINTF("Found %d inner quads not connected to outer quads, repairing\n", found);
-        for (int i=0; i<quad_count; i++)
+        for (int i=0; i<quad_count && *all_count < max_quad_buf_size; i++)
        {
            if (quads[i]->count < 4 && quads[i]->ordered)
            {
-                int added = icvAddOuterQuad(quads[i],quads,quad_count,all_quads,*all_count,corners);
+                int added = icvAddOuterQuad(quads[i],quads,quad_count,all_quads,*all_count,corners, max_quad_buf_size);
                *all_count += added;
                quad_count += added;
            }
        }
+
+        if (*all_count >= max_quad_buf_size)
+            return 0;
    }


@ -855,11 +859,11 @@ icvOrderFoundConnectedQuads( int quad_count, CvCBQuad **quads,

 static int
 icvAddOuterQuad( CvCBQuad *quad, CvCBQuad **quads, int quad_count,
-        CvCBQuad **all_quads, int all_count, CvCBCorner **corners )
+        CvCBQuad **all_quads, int all_count, CvCBCorner **corners, int max_quad_buf_size )

 {
    int added = 0;
-    for (int i=0; i<4; i++) // find no-neighbor corners
+    for (int i=0; i<4 && all_count < max_quad_buf_size; i++) // find no-neighbor corners
    {
        if (!quad->neighbors[i])    // ok, create and add neighbor
        {
@ -1649,7 +1653,7 @@ static void icvFindQuadNeighbors( CvCBQuad *quads, int quad_count )

 static int
 icvGenerateQuads( CvCBQuad **out_quads, CvCBCorner **out_corners,
-                  CvMemStorage *storage, CvMat *image, int flags )
+                  CvMemStorage *storage, CvMat *image, int flags, int *max_quad_buf_size )
 {
    int quad_count = 0;
    cv::Ptr<CvMemStorage> temp_storage;
@ -1754,8 +1758,9 @@ icvGenerateQuads( CvCBQuad **out_quads, CvCBCorner **out_corners,
    cvEndFindContours( &scanner );

    // allocate quad & corner buffers
-    *out_quads = (CvCBQuad*)cvAlloc((root->total+root->total / 2) * sizeof((*out_quads)[0]));
-    *out_corners = (CvCBCorner*)cvAlloc((root->total+root->total / 2) * 4 * sizeof((*out_corners)[0]));
+    *max_quad_buf_size = MAX(1, (root->total+root->total / 2)) * 2;
+    *out_quads = (CvCBQuad*)cvAlloc(*max_quad_buf_size * sizeof((*out_quads)[0]));
+    *out_corners = (CvCBCorner*)cvAlloc(*max_quad_buf_size * 4 * sizeof((*out_corners)[0]));

    // Create array of quads structures
    for( idx = 0; idx < root->total; idx++ )
--- a/modules/calib3d/src/fisheye.cpp
+++ b/modules/calib3d/src/fisheye.cpp
@ -512,7 +512,7 @@ void cv::fisheye::estimateNewCameraMatrixForUndistortRectify(InputArray K, Input
    OutputArray P, double balance, const Size& new_size, double fov_scale)
 {
    CV_Assert( K.size() == Size(3, 3)       && (K.depth() == CV_32F || K.depth() == CV_64F));
-    CV_Assert((D.empty() || D.total() == 4) && (D.depth() == CV_32F || D.depth() == CV_64F || D.empty()));
+    CV_Assert(D.empty() || ((D.total() == 4) && (D.depth() == CV_32F || D.depth() == CV_64F)));

    int w = image_size.width, h = image_size.height;
    balance = std::min(std::max(balance, 0.0), 1.0);
@ -694,12 +694,12 @@ double cv::fisheye::calibrate(InputArrayOfArrays objectPoints, InputArrayOfArray
    CV_Assert(!objectPoints.empty() && !imagePoints.empty() && objectPoints.total() == imagePoints.total());
    CV_Assert(objectPoints.type() == CV_32FC3 || objectPoints.type() == CV_64FC3);
    CV_Assert(imagePoints.type() == CV_32FC2 || imagePoints.type() == CV_64FC2);
-    CV_Assert((!K.empty() && K.size() == Size(3,3)) || K.empty());
-    CV_Assert((!D.empty() && D.total() == 4) || D.empty());
-    CV_Assert((!rvecs.empty() && rvecs.channels() == 3) || rvecs.empty());
-    CV_Assert((!tvecs.empty() && tvecs.channels() == 3) || tvecs.empty());
+    CV_Assert(K.empty() || (K.size() == Size(3,3)));
+    CV_Assert(D.empty() || (D.total() == 4));
+    CV_Assert(rvecs.empty() || (rvecs.channels() == 3));
+    CV_Assert(tvecs.empty() || (tvecs.channels() == 3));

-    CV_Assert(((flags & CALIB_USE_INTRINSIC_GUESS) && !K.empty() && !D.empty()) || !(flags & CALIB_USE_INTRINSIC_GUESS));
+    CV_Assert((!K.empty() && !D.empty()) || !(flags & CALIB_USE_INTRINSIC_GUESS));

    using namespace cv::internal;
    //-------------------------------Initialization
@ -825,12 +825,12 @@ double cv::fisheye::stereoCalibrate(InputArrayOfArrays objectPoints, InputArrayO
    CV_Assert(imagePoints1.type() == CV_32FC2 || imagePoints1.type() == CV_64FC2);
    CV_Assert(imagePoints2.type() == CV_32FC2 || imagePoints2.type() == CV_64FC2);

-    CV_Assert((!K1.empty() && K1.size() == Size(3,3)) || K1.empty());
-    CV_Assert((!D1.empty() && D1.total() == 4) || D1.empty());
-    CV_Assert((!K2.empty() && K1.size() == Size(3,3)) || K2.empty());
-    CV_Assert((!D2.empty() && D1.total() == 4) || D2.empty());
+    CV_Assert(K1.empty() || (K1.size() == Size(3,3)));
+    CV_Assert(D1.empty() || (D1.total() == 4));
+    CV_Assert(K2.empty() || (K1.size() == Size(3,3)));
+    CV_Assert(D2.empty() || (D1.total() == 4));

-    CV_Assert(((flags & CALIB_FIX_INTRINSIC) && !K1.empty() && !K2.empty() && !D1.empty() && !D2.empty()) || !(flags & CALIB_FIX_INTRINSIC));
+    CV_Assert((!K1.empty() && !K2.empty() && !D1.empty() && !D2.empty()) || !(flags & CALIB_FIX_INTRINSIC));

    //-------------------------------Initialization

--- a/modules/calib3d/src/main.cpp
+++ b/modules/calib3d/src/main.cpp
@ -0,0 +1,52 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+//
+// Library initialization file
+//
+
+#include "precomp.hpp"
+
+IPP_INITIALIZER_AUTO
+
+/* End of file. */
--- a/modules/calib3d/src/stereobm.cpp
+++ b/modules/calib3d/src/stereobm.cpp
@ -99,7 +99,7 @@ static bool ocl_prefilter_norm(InputArray _input, OutputArray _output, int winsi
    _output.create(input.size(), input.type());
    output = _output.getUMat();

-    size_t globalThreads[3] = { input.cols, input.rows, 1 };
+    size_t globalThreads[3] = { (size_t)input.cols, (size_t)input.rows, 1 };

    k.args(ocl::KernelArg::PtrReadOnly(input), ocl::KernelArg::PtrWriteOnly(output), input.rows, input.cols,
        prefilterCap, scale_g, scale_s);
@ -180,7 +180,7 @@ static bool ocl_prefilter_xsobel(InputArray _input, OutputArray _output, int pre
    _output.create(input.size(), input.type());
    output = _output.getUMat();

-    size_t globalThreads[3] = { input.cols, input.rows, 1 };
+    size_t globalThreads[3] = { (size_t)input.cols, (size_t)input.rows, 1 };

    k.args(ocl::KernelArg::PtrReadOnly(input), ocl::KernelArg::PtrWriteOnly(output), input.rows, input.cols, prefilterCap);

@ -927,8 +927,8 @@ static bool ocl_stereobm( InputArray _left, InputArray _right,

    int globalX = (disp.cols + sizeX - 1) / sizeX,
        globalY = (disp.rows + sizeY - 1) / sizeY;
-    size_t globalThreads[3] = {N, globalX, globalY};
-    size_t localThreads[3]  = {N, 1, 1};
+    size_t globalThreads[3] = {(size_t)N, (size_t)globalX, (size_t)globalY};
+    size_t localThreads[3]  = {(size_t)N, 1, 1};

    int idx = 0;
    idx = k.set(idx, ocl::KernelArg::PtrReadOnly(left));
--- a/modules/calib3d/src/stereosgbm.cpp
+++ b/modules/calib3d/src/stereosgbm.cpp
@ -1688,6 +1688,45 @@ void filterSpecklesImpl(cv::Mat& img, int newVal, int maxSpeckleSize, int maxDif
    }
 }

+#ifdef HAVE_IPP
+static bool ipp_filterSpeckles(Mat &img, int maxSpeckleSize, int newVal, int maxDiff)
+{
+#if IPP_VERSION_X100 >= 810
+    int type = img.type();
+    Ipp32s bufsize = 0;
+    IppiSize roisize = { img.cols, img.rows };
+    IppDataType datatype = type == CV_8UC1 ? ipp8u : ipp16s;
+    Ipp8u *pBuffer = NULL;
+    IppStatus status = ippStsNoErr;
+
+    if(ippiMarkSpecklesGetBufferSize(roisize, datatype, CV_MAT_CN(type), &bufsize) < 0)
+        return false;
+
+    pBuffer = (Ipp8u*)ippMalloc(bufsize);
+    if(!pBuffer && bufsize)
+        return false;
+
+    if (type == CV_8UC1)
+    {
+        status = ippiMarkSpeckles_8u_C1IR(img.ptr<Ipp8u>(), (int)img.step, roisize,
+                                            (Ipp8u)newVal, maxSpeckleSize, (Ipp8u)maxDiff, ippiNormL1, pBuffer);
+    }
+    else
+    {
+        status = ippiMarkSpeckles_16s_C1IR(img.ptr<Ipp16s>(), (int)img.step, roisize,
+                                            (Ipp16s)newVal, maxSpeckleSize, (Ipp16s)maxDiff, ippiNormL1, pBuffer);
+    }
+    if(pBuffer) ippFree(pBuffer);
+
+    if (status >= 0)
+        return true;
+#else
+    CV_UNUSED(img); CV_UNUSED(maxSpeckleSize); CV_UNUSED(newVal); CV_UNUSED(maxDiff);
+#endif
+    return false;
+}
+#endif
+
 }

 void cv::filterSpeckles( InputOutputArray _img, double _newval, int maxSpeckleSize,
@ -1700,37 +1739,7 @@ void cv::filterSpeckles( InputOutputArray _img, double _newval, int maxSpeckleSi

    int newVal = cvRound(_newval), maxDiff = cvRound(_maxDiff);

-#if IPP_VERSION_X100 >= 801
-    CV_IPP_CHECK()
-    {
-        Ipp32s bufsize = 0;
-        IppiSize roisize = { img.cols, img.rows };
-        IppDataType datatype = type == CV_8UC1 ? ipp8u : ipp16s;
-
-        if (!__buf.needed() && (type == CV_8UC1 || type == CV_16SC1))
-        {
-            IppStatus status = ippiMarkSpecklesGetBufferSize(roisize, datatype, CV_MAT_CN(type), &bufsize);
-            Ipp8u * buffer = ippsMalloc_8u(bufsize);
-
-            if ((int)status >= 0)
-            {
-                if (type == CV_8UC1)
-                    status = ippiMarkSpeckles_8u_C1IR(img.ptr<Ipp8u>(), (int)img.step, roisize,
-                                                      (Ipp8u)newVal, maxSpeckleSize, (Ipp8u)maxDiff, ippiNormL1, buffer);
-                else
-                    status = ippiMarkSpeckles_16s_C1IR(img.ptr<Ipp16s>(), (int)img.step, roisize,
-                                                       (Ipp16s)newVal, maxSpeckleSize, (Ipp16s)maxDiff, ippiNormL1, buffer);
-            }
-
-            if (status >= 0)
-            {
-                CV_IMPL_ADD(CV_IMPL_IPP);
-                return;
-            }
-            setIppErrorStatus();
-        }
-    }
-#endif
+    CV_IPP_RUN(IPP_VERSION_X100 >= 810 && !__buf.needed() && (type == CV_8UC1 || type == CV_16SC1), ipp_filterSpeckles(img, maxSpeckleSize, newVal, maxDiff));

    if (type == CV_8UC1)
        filterSpecklesImpl<uchar>(img, newVal, maxSpeckleSize, maxDiff, _buf);
--- a/modules/calib3d/test/test_fisheye.cpp
+++ b/modules/calib3d/test/test_fisheye.cpp
@ -101,15 +101,15 @@ TEST_F(fisheyeTest, projectPoints)

 TEST_F(fisheyeTest, DISABLED_undistortImage)
 {
-    cv::Matx33d K = this->K;
-    cv::Mat D = cv::Mat(this->D);
+    cv::Matx33d theK = this->K;
+    cv::Mat theD = cv::Mat(this->D);
    std::string file = combine(datasets_repository_path, "/calib-3_stereo_from_JY/left/stereo_pair_014.jpg");
-    cv::Matx33d newK = K;
+    cv::Matx33d newK = theK;
    cv::Mat distorted = cv::imread(file), undistorted;
    {
        newK(0, 0) = 100;
        newK(1, 1) = 100;
-        cv::fisheye::undistortImage(distorted, undistorted, K, D, newK);
+        cv::fisheye::undistortImage(distorted, undistorted, theK, theD, newK);
        cv::Mat correct = cv::imread(combine(datasets_repository_path, "new_f_100.png"));
        if (correct.empty())
            CV_Assert(cv::imwrite(combine(datasets_repository_path, "new_f_100.png"), undistorted));
@ -118,8 +118,8 @@ TEST_F(fisheyeTest, DISABLED_undistortImage)
    }
    {
        double balance = 1.0;
-        cv::fisheye::estimateNewCameraMatrixForUndistortRectify(K, D, distorted.size(), cv::noArray(), newK, balance);
-        cv::fisheye::undistortImage(distorted, undistorted, K, D, newK);
+        cv::fisheye::estimateNewCameraMatrixForUndistortRectify(theK, theD, distorted.size(), cv::noArray(), newK, balance);
+        cv::fisheye::undistortImage(distorted, undistorted, theK, theD, newK);
        cv::Mat correct = cv::imread(combine(datasets_repository_path, "balance_1.0.png"));
        if (correct.empty())
            CV_Assert(cv::imwrite(combine(datasets_repository_path, "balance_1.0.png"), undistorted));
@ -129,8 +129,8 @@ TEST_F(fisheyeTest, DISABLED_undistortImage)

    {
        double balance = 0.0;
-        cv::fisheye::estimateNewCameraMatrixForUndistortRectify(K, D, distorted.size(), cv::noArray(), newK, balance);
-        cv::fisheye::undistortImage(distorted, undistorted, K, D, newK);
+        cv::fisheye::estimateNewCameraMatrixForUndistortRectify(theK, theD, distorted.size(), cv::noArray(), newK, balance);
+        cv::fisheye::undistortImage(distorted, undistorted, theK, theD, newK);
        cv::Mat correct = cv::imread(combine(datasets_repository_path, "balance_0.0.png"));
        if (correct.empty())
            CV_Assert(cv::imwrite(combine(datasets_repository_path, "balance_0.0.png"), undistorted));
@ -143,7 +143,7 @@ TEST_F(fisheyeTest, jacobians)
 {
    int n = 10;
    cv::Mat X(1, n, CV_64FC3);
-    cv::Mat om(3, 1, CV_64F), T(3, 1, CV_64F);
+    cv::Mat om(3, 1, CV_64F), theT(3, 1, CV_64F);
    cv::Mat f(2, 1, CV_64F), c(2, 1, CV_64F);
    cv::Mat k(4, 1, CV_64F);
    double alpha;
@ -156,8 +156,8 @@ TEST_F(fisheyeTest, jacobians)
    r.fill(om, cv::RNG::NORMAL, 0, 1);
    om = cv::abs(om);

-    r.fill(T, cv::RNG::NORMAL, 0, 1);
-    T = cv::abs(T); T.at<double>(2) = 4; T *= 10;
+    r.fill(theT, cv::RNG::NORMAL, 0, 1);
+    theT = cv::abs(theT); theT.at<double>(2) = 4; theT *= 10;

    r.fill(f, cv::RNG::NORMAL, 0, 1);
    f = cv::abs(f) * 1000;
@ -171,19 +171,19 @@ TEST_F(fisheyeTest, jacobians)
    alpha = 0.01*r.gaussian(1);

    cv::Mat x1, x2, xpred;
-    cv::Matx33d K(f.at<double>(0), alpha * f.at<double>(0), c.at<double>(0),
+    cv::Matx33d theK(f.at<double>(0), alpha * f.at<double>(0), c.at<double>(0),
                     0,            f.at<double>(1), c.at<double>(1),
                     0,            0,    1);

    cv::Mat jacobians;
-    cv::fisheye::projectPoints(X, x1, om, T, K, k, alpha, jacobians);
+    cv::fisheye::projectPoints(X, x1, om, theT, theK, k, alpha, jacobians);

    //test on T:
    cv::Mat dT(3, 1, CV_64FC1);
    r.fill(dT, cv::RNG::NORMAL, 0, 1);
-    dT *= 1e-9*cv::norm(T);
-    cv::Mat T2 = T + dT;
-    cv::fisheye::projectPoints(X, x2, om, T2, K, k, alpha, cv::noArray());
+    dT *= 1e-9*cv::norm(theT);
+    cv::Mat T2 = theT + dT;
+    cv::fisheye::projectPoints(X, x2, om, T2, theK, k, alpha, cv::noArray());
    xpred = x1 + cv::Mat(jacobians.colRange(11,14) * dT).reshape(2, 1);
    CV_Assert (cv::norm(x2 - xpred) < 1e-10);

@ -192,7 +192,7 @@ TEST_F(fisheyeTest, jacobians)
    r.fill(dom, cv::RNG::NORMAL, 0, 1);
    dom *= 1e-9*cv::norm(om);
    cv::Mat om2 = om + dom;
-    cv::fisheye::projectPoints(X, x2, om2, T, K, k, alpha, cv::noArray());
+    cv::fisheye::projectPoints(X, x2, om2, theT, theK, k, alpha, cv::noArray());
    xpred = x1 + cv::Mat(jacobians.colRange(8,11) * dom).reshape(2, 1);
    CV_Assert (cv::norm(x2 - xpred) < 1e-10);

@ -200,8 +200,8 @@ TEST_F(fisheyeTest, jacobians)
    cv::Mat df(2, 1, CV_64FC1);
    r.fill(df, cv::RNG::NORMAL, 0, 1);
    df *= 1e-9*cv::norm(f);
-    cv::Matx33d K2 = K + cv::Matx33d(df.at<double>(0), df.at<double>(0) * alpha, 0, 0, df.at<double>(1), 0, 0, 0, 0);
-    cv::fisheye::projectPoints(X, x2, om, T, K2, k, alpha, cv::noArray());
+    cv::Matx33d K2 = theK + cv::Matx33d(df.at<double>(0), df.at<double>(0) * alpha, 0, 0, df.at<double>(1), 0, 0, 0, 0);
+    cv::fisheye::projectPoints(X, x2, om, theT, K2, k, alpha, cv::noArray());
    xpred = x1 + cv::Mat(jacobians.colRange(0,2) * df).reshape(2, 1);
    CV_Assert (cv::norm(x2 - xpred) < 1e-10);

@ -209,8 +209,8 @@ TEST_F(fisheyeTest, jacobians)
    cv::Mat dc(2, 1, CV_64FC1);
    r.fill(dc, cv::RNG::NORMAL, 0, 1);
    dc *= 1e-9*cv::norm(c);
-    K2 = K + cv::Matx33d(0, 0, dc.at<double>(0), 0, 0, dc.at<double>(1), 0, 0, 0);
-    cv::fisheye::projectPoints(X, x2, om, T, K2, k, alpha, cv::noArray());
+    K2 = theK + cv::Matx33d(0, 0, dc.at<double>(0), 0, 0, dc.at<double>(1), 0, 0, 0);
+    cv::fisheye::projectPoints(X, x2, om, theT, K2, k, alpha, cv::noArray());
    xpred = x1 + cv::Mat(jacobians.colRange(2,4) * dc).reshape(2, 1);
    CV_Assert (cv::norm(x2 - xpred) < 1e-10);

@ -219,7 +219,7 @@ TEST_F(fisheyeTest, jacobians)
    r.fill(dk, cv::RNG::NORMAL, 0, 1);
    dk *= 1e-9*cv::norm(k);
    cv::Mat k2 = k + dk;
-    cv::fisheye::projectPoints(X, x2, om, T, K, k2, alpha, cv::noArray());
+    cv::fisheye::projectPoints(X, x2, om, theT, theK, k2, alpha, cv::noArray());
    xpred = x1 + cv::Mat(jacobians.colRange(4,8) * dk).reshape(2, 1);
    CV_Assert (cv::norm(x2 - xpred) < 1e-10);

@ -228,8 +228,8 @@ TEST_F(fisheyeTest, jacobians)
    r.fill(dalpha, cv::RNG::NORMAL, 0, 1);
    dalpha *= 1e-9*cv::norm(f);
    double alpha2 = alpha + dalpha.at<double>(0);
-    K2 = K + cv::Matx33d(0, f.at<double>(0) * dalpha.at<double>(0), 0, 0, 0, 0, 0, 0, 0);
-    cv::fisheye::projectPoints(X, x2, om, T, K, k, alpha2, cv::noArray());
+    K2 = theK + cv::Matx33d(0, f.at<double>(0) * dalpha.at<double>(0), 0, 0, 0, 0, 0, 0, 0);
+    cv::fisheye::projectPoints(X, x2, om, theT, theK, k, alpha2, cv::noArray());
    xpred = x1 + cv::Mat(jacobians.col(14) * dalpha).reshape(2, 1);
    CV_Assert (cv::norm(x2 - xpred) < 1e-10);
 }
@ -259,14 +259,14 @@ TEST_F(fisheyeTest, Calibration)
    flag |= cv::fisheye::CALIB_CHECK_COND;
    flag |= cv::fisheye::CALIB_FIX_SKEW;

-    cv::Matx33d K;
-    cv::Vec4d D;
+    cv::Matx33d theK;
+    cv::Vec4d theD;

-    cv::fisheye::calibrate(objectPoints, imagePoints, imageSize, K, D,
+    cv::fisheye::calibrate(objectPoints, imagePoints, imageSize, theK, theD,
                           cv::noArray(), cv::noArray(), flag, cv::TermCriteria(3, 20, 1e-6));

-    EXPECT_MAT_NEAR(K, this->K, 1e-10);
-    EXPECT_MAT_NEAR(D, this->D, 1e-10);
+    EXPECT_MAT_NEAR(theK, this->K, 1e-10);
+    EXPECT_MAT_NEAR(theD, this->D, 1e-10);
 }

 TEST_F(fisheyeTest, Homography)
@ -303,15 +303,15 @@ TEST_F(fisheyeTest, Homography)
    int Np = imagePointsNormalized.cols;
    cv::calcCovarMatrix(_objectPoints, covObjectPoints, objectPointsMean, cv::COVAR_NORMAL | cv::COVAR_COLS);
    cv::SVD svd(covObjectPoints);
-    cv::Mat R(svd.vt);
+    cv::Mat theR(svd.vt);

-    if (cv::norm(R(cv::Rect(2, 0, 1, 2))) < 1e-6)
-        R = cv::Mat::eye(3,3, CV_64FC1);
-    if (cv::determinant(R) < 0)
-        R = -R;
+    if (cv::norm(theR(cv::Rect(2, 0, 1, 2))) < 1e-6)
+        theR = cv::Mat::eye(3,3, CV_64FC1);
+    if (cv::determinant(theR) < 0)
+        theR = -theR;

-    cv::Mat T = -R * objectPointsMean;
-    cv::Mat X_new = R * _objectPoints + T * cv::Mat::ones(1, Np, CV_64FC1);
+    cv::Mat theT = -theR * objectPointsMean;
+    cv::Mat X_new = theR * _objectPoints + theT * cv::Mat::ones(1, Np, CV_64FC1);
    cv::Mat H = cv::internal::ComputeHomography(imagePointsNormalized, X_new.rowRange(0, 2));

    cv::Mat M = cv::Mat::ones(3, X_new.cols, CV_64FC1);
@ -355,19 +355,19 @@ TEST_F(fisheyeTest, EtimateUncertainties)
    flag |= cv::fisheye::CALIB_CHECK_COND;
    flag |= cv::fisheye::CALIB_FIX_SKEW;

-    cv::Matx33d K;
-    cv::Vec4d D;
+    cv::Matx33d theK;
+    cv::Vec4d theD;
    std::vector<cv::Vec3d> rvec;
    std::vector<cv::Vec3d> tvec;

-    cv::fisheye::calibrate(objectPoints, imagePoints, imageSize, K, D,
+    cv::fisheye::calibrate(objectPoints, imagePoints, imageSize, theK, theD,
                           rvec, tvec, flag, cv::TermCriteria(3, 20, 1e-6));

    cv::internal::IntrinsicParams param, errors;
    cv::Vec2d err_std;
    double thresh_cond = 1e6;
    int check_cond = 1;
-    param.Init(cv::Vec2d(K(0,0), K(1,1)), cv::Vec2d(K(0,2), K(1, 2)), D);
+    param.Init(cv::Vec2d(theK(0,0), theK(1,1)), cv::Vec2d(theK(0,2), theK(1, 2)), theD);
    param.isEstimate = std::vector<int>(9, 1);
    param.isEstimate[4] = 0;

@ -399,12 +399,12 @@ TEST_F(fisheyeTest, rectify)
    cv::Matx33d K1 = this->K, K2 = K1;
    cv::Mat D1 = cv::Mat(this->D), D2 = D1;

-    cv::Vec3d T = this->T;
-    cv::Matx33d R = this->R;
+    cv::Vec3d theT = this->T;
+    cv::Matx33d theR = this->R;

    double balance = 0.0, fov_scale = 1.1;
    cv::Mat R1, R2, P1, P2, Q;
-    cv::fisheye::stereoRectify(K1, D1, K2, D2, calibration_size, R, T, R1, R2, P1, P2, Q,
+    cv::fisheye::stereoRectify(K1, D1, K2, D2, calibration_size, theR, theT, R1, R2, P1, P2, Q,
                      cv::CALIB_ZERO_DISPARITY, requested_size, balance, fov_scale);

    cv::Mat lmapx, lmapy, rmapx, rmapy;
@ -468,8 +468,8 @@ TEST_F(fisheyeTest, stereoCalibrate)
    fs_object[cv::format("image_%d", i )] >> objectPoints[i];
    fs_object.release();

-    cv::Matx33d K1, K2, R;
-    cv::Vec3d T;
+    cv::Matx33d K1, K2, theR;
+    cv::Vec3d theT;
    cv::Vec4d D1, D2;

    int flag = 0;
@ -479,7 +479,7 @@ TEST_F(fisheyeTest, stereoCalibrate)
   // flag |= cv::fisheye::CALIB_FIX_INTRINSIC;

    cv::fisheye::stereoCalibrate(objectPoints, leftPoints, rightPoints,
-                    K1, D1, K2, D2, imageSize, R, T, flag,
+                    K1, D1, K2, D2, imageSize, theR, theT, flag,
                    cv::TermCriteria(3, 12, 0));

    cv::Matx33d R_correct(   0.9975587205950972,   0.06953016383322372, 0.006492709911733523,
@ -497,8 +497,8 @@ TEST_F(fisheyeTest, stereoCalibrate)
    cv::Vec4d D1_correct (-7.44253716539556e-05, -0.00702662033932424, 0.00737569823650885, -0.00342230256441771);
    cv::Vec4d D2_correct (-0.0130785435677431, 0.0284434505383497, -0.0360333869900506, 0.0144724062347222);

-    EXPECT_MAT_NEAR(R, R_correct, 1e-10);
-    EXPECT_MAT_NEAR(T, T_correct, 1e-10);
+    EXPECT_MAT_NEAR(theR, R_correct, 1e-10);
+    EXPECT_MAT_NEAR(theT, T_correct, 1e-10);

    EXPECT_MAT_NEAR(K1, K1_correct, 1e-10);
    EXPECT_MAT_NEAR(K2, K2_correct, 1e-10);
@ -536,8 +536,8 @@ TEST_F(fisheyeTest, stereoCalibrateFixIntrinsic)
    fs_object[cv::format("image_%d", i )] >> objectPoints[i];
    fs_object.release();

-    cv::Matx33d R;
-    cv::Vec3d T;
+    cv::Matx33d theR;
+    cv::Vec3d theT;

    int flag = 0;
    flag |= cv::fisheye::CALIB_RECOMPUTE_EXTRINSIC;
@ -557,7 +557,7 @@ TEST_F(fisheyeTest, stereoCalibrateFixIntrinsic)
    cv::Vec4d D2 (-0.0130785435677431, 0.0284434505383497, -0.0360333869900506, 0.0144724062347222);

    cv::fisheye::stereoCalibrate(objectPoints, leftPoints, rightPoints,
-                    K1, D1, K2, D2, imageSize, R, T, flag,
+                    K1, D1, K2, D2, imageSize, theR, theT, flag,
                    cv::TermCriteria(3, 12, 0));

    cv::Matx33d R_correct(   0.9975587205950972,   0.06953016383322372, 0.006492709911733523,
@ -566,8 +566,8 @@ TEST_F(fisheyeTest, stereoCalibrateFixIntrinsic)
    cv::Vec3d T_correct(-0.099402724724121, 0.00270812139265413, 0.00129330292472699);


-    EXPECT_MAT_NEAR(R, R_correct, 1e-10);
-    EXPECT_MAT_NEAR(T, T_correct, 1e-10);
+    EXPECT_MAT_NEAR(theR, R_correct, 1e-10);
+    EXPECT_MAT_NEAR(theT, T_correct, 1e-10);
 }

 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
--- a/modules/core/CMakeLists.txt
+++ b/modules/core/CMakeLists.txt
@ -1,7 +1,7 @@
 set(the_description "The Core Functionality")
 ocv_add_module(core
               opencv_hal
-               PRIVATE_REQUIRED ${ZLIB_LIBRARIES} "${OPENCL_LIBRARIES}"
+               PRIVATE_REQUIRED ${ZLIB_LIBRARIES} "${OPENCL_LIBRARIES}" "${VA_LIBRARIES}"
               OPTIONAL opencv_cudev
               WRAP java python)

--- a/modules/core/include/opencv2/core.hpp
+++ b/modules/core/include/opencv2/core.hpp
@ -2381,8 +2381,7 @@ class CV_EXPORTS LDA
 {
 public:
    /** @brief constructor
-    Initializes a LDA with num_components (default 0) and specifies how
-    samples are aligned (default dataAsRow=true).
+    Initializes a LDA with num_components (default 0).
    */
    explicit LDA(int num_components = 0);

@ -2413,15 +2412,17 @@ public:
      */
    ~LDA();

-    /** Compute the discriminants for data in src and labels.
+    /** Compute the discriminants for data in src (row aligned) and labels.
      */
    void compute(InputArrayOfArrays src, InputArray labels);

    /** Projects samples into the LDA subspace.
+        src may be one or more row aligned samples.
      */
    Mat project(InputArray src);

    /** Reconstructs projections from the LDA subspace.
+        src may be one or more row aligned projections.
      */
    Mat reconstruct(InputArray src);

@ -2437,11 +2438,10 @@ public:
    static Mat subspaceReconstruct(InputArray W, InputArray mean, InputArray src);

 protected:
-    bool _dataAsRow;
+    bool _dataAsRow; // unused, but needed for 3.0 ABI compatibility.
    int _num_components;
    Mat _eigenvectors;
    Mat _eigenvalues;
-
    void lda(InputArrayOfArrays src, InputArray labels);
 };

--- a/modules/core/include/opencv2/core/base.hpp
+++ b/modules/core/include/opencv2/core/base.hpp
@ -645,6 +645,7 @@ namespace cudev

 namespace ipp
 {
+CV_EXPORTS int getIppFeatures();
 CV_EXPORTS void setIppStatus(int status, const char * const funcname = NULL, const char * const filename = NULL,
                             int line = 0);
 CV_EXPORTS int getIppStatus();
--- a/modules/core/include/opencv2/core/mat.hpp
+++ b/modules/core/include/opencv2/core/mat.hpp
@ -496,6 +496,8 @@ struct CV_EXPORTS UMatData
    void* handle;
    void* userdata;
    int allocatorFlags_;
+    int mapcount;
+    UMatData* originalUMatData;
 };


@ -1071,6 +1073,7 @@ public:
    @param m Destination matrix. If it does not have a proper size or type before the operation, it is
    reallocated.
    @param mask Operation mask. Its non-zero elements indicate which matrix elements need to be copied.
+    The mask has to be of type CV_8U and can have 1 or multiple channels.
    */
    void copyTo( OutputArray m, InputArray mask ) const;

--- a/modules/core/include/opencv2/core/matx.hpp
+++ b/modules/core/include/opencv2/core/matx.hpp
@ -494,7 +494,7 @@ Matx<_Tp, m, n>::Matx(_Tp v0)
 template<typename _Tp, int m, int n> inline
 Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1)
 {
-    CV_StaticAssert(channels >= 2, "Matx should have at least 2 elaments.");
+    CV_StaticAssert(channels >= 2, "Matx should have at least 2 elements.");
    val[0] = v0; val[1] = v1;
    for(int i = 2; i < channels; i++) val[i] = _Tp(0);
 }
@ -502,7 +502,7 @@ Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1)
 template<typename _Tp, int m, int n> inline
 Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2)
 {
-    CV_StaticAssert(channels >= 3, "Matx should have at least 3 elaments.");
+    CV_StaticAssert(channels >= 3, "Matx should have at least 3 elements.");
    val[0] = v0; val[1] = v1; val[2] = v2;
    for(int i = 3; i < channels; i++) val[i] = _Tp(0);
 }
@ -510,7 +510,7 @@ Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2)
 template<typename _Tp, int m, int n> inline
 Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3)
 {
-    CV_StaticAssert(channels >= 4, "Matx should have at least 4 elaments.");
+    CV_StaticAssert(channels >= 4, "Matx should have at least 4 elements.");
    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
    for(int i = 4; i < channels; i++) val[i] = _Tp(0);
 }
@ -518,7 +518,7 @@ Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3)
 template<typename _Tp, int m, int n> inline
 Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4)
 {
-    CV_StaticAssert(channels >= 5, "Matx should have at least 5 elaments.");
+    CV_StaticAssert(channels >= 5, "Matx should have at least 5 elements.");
    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; val[4] = v4;
    for(int i = 5; i < channels; i++) val[i] = _Tp(0);
 }
@ -526,7 +526,7 @@ Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4)
 template<typename _Tp, int m, int n> inline
 Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5)
 {
-    CV_StaticAssert(channels >= 6, "Matx should have at least 6 elaments.");
+    CV_StaticAssert(channels >= 6, "Matx should have at least 6 elements.");
    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
    val[4] = v4; val[5] = v5;
    for(int i = 6; i < channels; i++) val[i] = _Tp(0);
@ -535,7 +535,7 @@ Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5)
 template<typename _Tp, int m, int n> inline
 Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6)
 {
-    CV_StaticAssert(channels >= 7, "Matx should have at least 7 elaments.");
+    CV_StaticAssert(channels >= 7, "Matx should have at least 7 elements.");
    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
    val[4] = v4; val[5] = v5; val[6] = v6;
    for(int i = 7; i < channels; i++) val[i] = _Tp(0);
@ -544,7 +544,7 @@ Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6)
 template<typename _Tp, int m, int n> inline
 Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7)
 {
-    CV_StaticAssert(channels >= 8, "Matx should have at least 8 elaments.");
+    CV_StaticAssert(channels >= 8, "Matx should have at least 8 elements.");
    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
    val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
    for(int i = 8; i < channels; i++) val[i] = _Tp(0);
@ -553,7 +553,7 @@ Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _T
 template<typename _Tp, int m, int n> inline
 Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8)
 {
-    CV_StaticAssert(channels >= 9, "Matx should have at least 9 elaments.");
+    CV_StaticAssert(channels >= 9, "Matx should have at least 9 elements.");
    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
    val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
    val[8] = v8;
@ -563,7 +563,7 @@ Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _T
 template<typename _Tp, int m, int n> inline
 Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9)
 {
-    CV_StaticAssert(channels >= 10, "Matx should have at least 10 elaments.");
+    CV_StaticAssert(channels >= 10, "Matx should have at least 10 elements.");
    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
    val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
    val[8] = v8; val[9] = v9;
@ -574,20 +574,22 @@ Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _T
 template<typename _Tp, int m, int n> inline
 Matx<_Tp,m,n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11)
 {
-    CV_StaticAssert(channels == 12, "Matx should have at least 12 elaments.");
+    CV_StaticAssert(channels >= 12, "Matx should have at least 12 elements.");
    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
    val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
    val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11;
+    for(int i = 12; i < channels; i++) val[i] = _Tp(0);
 }

 template<typename _Tp, int m, int n> inline
 Matx<_Tp,m,n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13, _Tp v14, _Tp v15)
 {
-    CV_StaticAssert(channels == 16, "Matx should have at least 16 elaments.");
+    CV_StaticAssert(channels >= 16, "Matx should have at least 16 elements.");
    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
    val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
    val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11;
    val[12] = v12; val[13] = v13; val[14] = v14; val[15] = v15;
+    for(int i = 16; i < channels; i++) val[i] = _Tp(0);
 }

 template<typename _Tp, int m, int n> inline
--- a/modules/core/include/opencv2/core/private.hpp
+++ b/modules/core/include/opencv2/core/private.hpp
@ -191,9 +191,16 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
 \****************************************************************************************/

 #ifdef HAVE_IPP
-#  include "ipp.h"
+#include "ipp.h"

-#  define IPP_VERSION_X100 (IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR)
+#ifndef IPP_VERSION_UPDATE // prior to 7.1
+#define IPP_VERSION_UPDATE 0
+#endif
+
+#define IPP_VERSION_X100 (IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR*10 + IPP_VERSION_UPDATE)
+
+// General define for ipp function disabling
+#define IPP_DISABLE_BLOCK 0

 #ifdef CV_MALLOC_ALIGN
 #undef CV_MALLOC_ALIGN
@ -234,8 +241,33 @@ static inline IppDataType ippiGetDataType(int depth)
        depth == CV_64F ? ipp64f : (IppDataType)-1;
 }

+// IPP temporary buffer hepler
+template<typename T>
+class IppAutoBuffer
+{
+public:
+    IppAutoBuffer() { m_pBuffer = NULL; }
+    IppAutoBuffer(int size) { Alloc(size); }
+    ~IppAutoBuffer() { Release(); }
+    T* Alloc(int size) { m_pBuffer = (T*)ippMalloc(size); return m_pBuffer; }
+    void Release() { if(m_pBuffer) ippFree(m_pBuffer); }
+    inline operator T* () { return (T*)m_pBuffer;}
+    inline operator const T* () const { return (const T*)m_pBuffer;}
+private:
+    // Disable copy operations
+    IppAutoBuffer(IppAutoBuffer &) {};
+    IppAutoBuffer& operator =(const IppAutoBuffer &) {return *this;};
+
+    T* m_pBuffer;
+};
+
 #else
-#  define IPP_VERSION_X100 0
+#define IPP_VERSION_X100 0
+#endif
+
+// There shoud be no API difference in OpenCV between ICV and IPP since 9.0
+#if (defined HAVE_IPP_ICV_ONLY) && IPP_VERSION_X100 >= 900
+#undef HAVE_IPP_ICV_ONLY
 #endif

 #ifdef HAVE_IPP_ICV_ONLY
@ -244,6 +276,42 @@ static inline IppDataType ippiGetDataType(int depth)
 #define HAVE_ICV 0
 #endif

+#if defined HAVE_IPP
+#if IPP_VERSION_X100 >= 900
+#define IPP_INITIALIZER(FEAT)                           \
+{                                                       \
+    if(FEAT)                                            \
+        ippSetCpuFeatures(FEAT);                        \
+    else                                                \
+        ippInit();                                      \
+}
+#elif IPP_VERSION_X100 >= 800
+#define IPP_INITIALIZER(FEAT)                           \
+{                                                       \
+    ippInit();                                          \
+}
+#else
+#define IPP_INITIALIZER(FEAT)                           \
+{                                                       \
+    ippStaticInit();                                    \
+}
+#endif
+
+#ifdef CVAPI_EXPORTS
+#define IPP_INITIALIZER_AUTO                            \
+struct __IppInitializer__                               \
+{                                                       \
+    __IppInitializer__()                                \
+    {IPP_INITIALIZER(cv::ipp::getIppFeatures())}        \
+};                                                      \
+static struct __IppInitializer__ __ipp_initializer__;
+#else
+#define IPP_INITIALIZER_AUTO
+#endif
+#else
+#define IPP_INITIALIZER
+#define IPP_INITIALIZER_AUTO
+#endif

 #define CV_IPP_CHECK_COND (cv::ipp::useIPP())
 #define CV_IPP_CHECK() if(CV_IPP_CHECK_COND)
--- a/modules/core/include/opencv2/core/utility.hpp
+++ b/modules/core/include/opencv2/core/utility.hpp
@ -520,6 +520,7 @@ protected:
    TLSDataContainer();
    virtual ~TLSDataContainer();

+    void  gatherData(std::vector<void*> &data) const;
 #if OPENCV_ABI_COMPATIBILITY > 300
    void* getData() const;
    void  release();
@ -546,9 +547,20 @@ public:
    inline ~TLSData()       { release();            } // Release key and delete associated data
    inline T* get() const   { return (T*)getData(); } // Get data assosiated with key

+     // Get data from all threads
+    inline void gather(std::vector<T*> &data) const
+    {
+        std::vector<void*> &dataVoid = reinterpret_cast<std::vector<void*>&>(data);
+        gatherData(dataVoid);
+    }
+
 private:
    virtual void* createDataInstance() const {return new T;}                // Wrapper to allocate data by template
    virtual void  deleteDataInstance(void* pData) const {delete (T*)pData;} // Wrapper to release data by template
+
+    // Disable TLS copy operations
+    TLSData(TLSData &) {};
+    TLSData& operator =(const TLSData &) {return *this;};
 };

 /** @brief Designed for command line parsing
@ -597,7 +609,7 @@ For example:
    const String keys =
        "{help h usage ? |      | print this message   }"
        "{@image1        |      | image1 for compare   }"
-        "{@image2        |      | image2 for compare   }"
+        "{@image2        |<none>| image2 for compare   }"
        "{@repeat        |1     | number               }"
        "{path           |.     | path to file         }"
        "{fps            | -1.0 | fps for output video }"
@ -607,6 +619,13 @@ For example:
 }
@endcode

+Note that there are no default values for `help` and `timestamp` so we can check their presence using the `has()` method.
+Arguments with default values are considered to be always present. Use the `get()` method in these cases to check their
+actual value instead.
+
+String keys like `get<String>("@image1")` return the empty string `""` by default - even with an empty default value.
+Use the special `<none>` default value to enforce that the returned string must not be empty. (like in `get<String>("@image2")`)
+
 ### Usage

 For the described keys:
@ -618,7 +637,7 @@ For the described keys:
    # Bad call
    $ ./app -fps=aaa
    ERRORS:
-    Exception: can not convert: [aaa] to [double]
+    Parameter 'fps': can not convert: [aaa] to [double]
@endcode
 */
 class CV_EXPORTS CommandLineParser
--- a/modules/core/include/opencv2/core/va_intel.hpp
+++ b/modules/core/include/opencv2/core/va_intel.hpp
@ -5,36 +5,36 @@
 // Copyright (C) 2015, Itseez, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.

-#ifndef __OPENCV_CORE_VAAPI_HPP__
-#define __OPENCV_CORE_VAAPI_HPP__
+#ifndef __OPENCV_CORE_VA_INTEL_HPP__
+#define __OPENCV_CORE_VA_INTEL_HPP__

 #ifndef __cplusplus
-#  error vaapi.hpp header must be compiled as C++
+#  error va_intel.hpp header must be compiled as C++
 #endif

 #include "opencv2/core.hpp"
 #include "ocl.hpp"

-#if defined(HAVE_VAAPI)
+#if defined(HAVE_VA)
 # include "va/va.h"
-#else  // HAVE_VAAPI
+#else  // HAVE_VA
 # if !defined(_VA_H_)
    typedef void* VADisplay;
    typedef unsigned int VASurfaceID;
 # endif // !_VA_H_
-#endif // HAVE_VAAPI
+#endif // HAVE_VA

-namespace cv { namespace vaapi {
+namespace cv { namespace va_intel {

-/** @addtogroup core_vaapi
-This section describes CL-VA (VA-API) interoperability.
+/** @addtogroup core_va_intel
+This section describes Intel VA-API/OpenCL (CL-VA) interoperability.

-To enable CL-VA interoperability support, configure OpenCV using CMake with WITH_VAAPI=ON . Currently VA-API is
+To enable CL-VA interoperability support, configure OpenCV using CMake with WITH_VA_INTEL=ON . Currently VA-API is
 supported on Linux only. You should also install Intel Media Server Studio (MSS) to use this feature. You may
-have to specify the path(s) to MSS components for cmake in environment variables: VAAPI_MSDK_ROOT for Media SDK
-(default is "/opt/intel/mediasdk"), and VAAPI_IOCL_ROOT for Intel OpenCL (default is "/opt/intel/opencl").
+have to specify the path(s) to MSS components for cmake in environment variables: VA_INTEL_MSDK_ROOT for Media SDK
+(default is "/opt/intel/mediasdk"), and VA_INTEL_IOCL_ROOT for Intel OpenCL (default is "/opt/intel/opencl").

-To use VA-API interoperability you should first create VADisplay (libva), and then call initializeContextFromVA()
+To use CL-VA interoperability you should first create VADisplay (libva), and then call initializeContextFromVA()
 function to create OpenCL context and set up interoperability.
 */
 //! @{
@ -46,29 +46,32 @@ using namespace cv::ocl;

 // TODO static functions in the Context class
 /** @brief Creates OpenCL context from VA.
-@param display - VADisplay for which CL interop should be established.
+@param display    - VADisplay for which CL interop should be established.
+@param tryInterop - try to set up for interoperability, if true; set up for use slow copy if false.
@return Returns reference to OpenCL Context
 */
-CV_EXPORTS Context& initializeContextFromVA(VADisplay display);
+CV_EXPORTS Context& initializeContextFromVA(VADisplay display, bool tryInterop = true);

-} // namespace cv::vaapi::ocl
+} // namespace cv::va_intel::ocl

 /** @brief Converts InputArray to VASurfaceID object.
+@param display - VADisplay object.
@param src     - source InputArray.
@param surface - destination VASurfaceID object.
@param size    - size of image represented by VASurfaceID object.
 */
-CV_EXPORTS void convertToVASurface(InputArray src, VASurfaceID surface, Size size);
+CV_EXPORTS void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface, Size size);

 /** @brief Converts VASurfaceID object to OutputArray.
+@param display - VADisplay object.
@param surface - source VASurfaceID object.
@param size    - size of image represented by VASurfaceID object.
@param dst     - destination OutputArray.
 */
-CV_EXPORTS void convertFromVASurface(VASurfaceID surface, Size size, OutputArray dst);
+CV_EXPORTS void convertFromVASurface(VADisplay display, VASurfaceID surface, Size size, OutputArray dst);

 //! @}

-}} // namespace cv::vaapi
+}} // namespace cv::va_intel

-#endif /* __OPENCV_CORE_VAAPI_HPP__ */
+#endif /* __OPENCV_CORE_VA_INTEL_HPP__ */
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@ -1504,7 +1504,7 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
            k.args(src1arg, src2arg, maskarg, dstarg);
    }

-    size_t globalsize[] = { src1.cols * cn / kercn, (src1.rows + rowsPerWI - 1) / rowsPerWI };
+    size_t globalsize[] = { (size_t)src1.cols * cn / kercn, ((size_t)src1.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, 0, false);
 }

@ -1917,7 +1917,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
            k.args(src1arg, src2arg, maskarg, dstarg);
    }

-    size_t globalsize[] = { src1.cols * cn / kercn, (src1.rows + rowsPerWI - 1) / rowsPerWI };
+    size_t globalsize[] = { (size_t)src1.cols * cn / kercn, ((size_t)src1.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
 }

@ -4692,7 +4692,7 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
    CV_IPP_CHECK()
    {
        IppCmpOp op = convert_cmp(*(int *)_cmpop);
-        if( op  > 0 )
+        if( op >= 0 )
        {
            fixSteps(size, sizeof(dst[0]), step1, step2, step);
            if (0 <= ippiCompare_16s_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(size), op))
@ -4974,7 +4974,7 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in
               ocl::KernelArg::WriteOnly(dst, cn, kercn));
    }

-    size_t globalsize[2] = { dst.cols * cn / kercn, (dst.rows + rowsPerWI - 1) / rowsPerWI };
+    size_t globalsize[2] = { (size_t)dst.cols * cn / kercn, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
 }

@ -5668,7 +5668,7 @@ static bool ocl_inRange( InputArray _src, InputArray _lowerb,
        ker.args(srcarg, dstarg, ocl::KernelArg::ReadOnlyNoSize(lscalaru),
               ocl::KernelArg::ReadOnlyNoSize(uscalaru), rowsPerWI);

-    size_t globalsize[2] = { ssize.width / colsPerWI, (ssize.height + rowsPerWI - 1) / rowsPerWI };
+    size_t globalsize[2] = { (size_t)ssize.width / colsPerWI, ((size_t)ssize.height + rowsPerWI - 1) / rowsPerWI };
    return ker.run(2, globalsize, NULL, false);
 }

--- a/modules/core/src/command_line_parser.cpp
+++ b/modules/core/src/command_line_parser.cpp
@ -4,6 +4,20 @@
 namespace cv
 {

+namespace {
+static const char* noneValue = "<none>";
+
+static String cat_string(const String& str)
+{
+    int left = 0, right = (int)str.length();
+    while( left <= right && str[left] == ' ' )
+        left++;
+    while( right > left && str[right-1] == ' ' )
+        right--;
+    return left >= right ? String("") : str.substr(left, right-left);
+}
+}
+
 struct CommandLineParserParams
 {
 public:
@ -27,7 +41,6 @@ struct CommandLineParser::Impl

    std::vector<String> split_range_string(const String& str, char fs, char ss) const;
    std::vector<String> split_string(const String& str, char symbol = ' ', bool create_empty_item = false) const;
-    String cat_string(const String& str) const;

    void apply_params(const String& key, const String& value);
    void apply_params(int i, String value);
@ -37,7 +50,7 @@ struct CommandLineParser::Impl
 };


-static String get_type_name(int type)
+static const char* get_type_name(int type)
 {
    if( type == Param::INT )
        return "int";
@ -78,14 +91,11 @@ static void from_str(const String& str, int type, void* dst)
    else if( type == Param::STRING )
        *(String*)dst = str;
    else
-        throw cv::Exception(CV_StsBadArg, "unknown/unsupported parameter type", "", __FILE__, __LINE__);
+        CV_Error(Error::StsBadArg, "unknown/unsupported parameter type");

    if (ss.fail())
    {
-        String err_msg = "can not convert: [" + str +
-        + "] to [" + get_type_name(type) + "]";
-
-        throw cv::Exception(CV_StsBadArg, err_msg, "", __FILE__, __LINE__);
+        CV_Error_(Error::StsBadArg, ("can not convert: [%s] to [%s]", str.c_str(), get_type_name(type)));
    }
 }

@ -97,24 +107,33 @@ void CommandLineParser::getByName(const String& name, bool space_delete, int typ
        {
            for (size_t j = 0; j < impl->data[i].keys.size(); j++)
            {
-                if (name.compare(impl->data[i].keys[j]) == 0)
+                if (name == impl->data[i].keys[j])
                {
                    String v = impl->data[i].def_value;
                    if (space_delete)
-                        v = impl->cat_string(v);
+                        v = cat_string(v);
+
+                    // the key was neither specified nor has it a default value
+                    if((v.empty() && type != Param::STRING) || v == noneValue) {
+                        impl->error = true;
+                        impl->error_message = impl->error_message + "Missing parameter: '" + name + "'\n";
+                        return;
+                    }
+
                    from_str(v, type, dst);
                    return;
                }
            }
        }
-        impl->error = true;
-        impl->error_message = impl->error_message + "Unknown parameter " + name + "\n";
    }
-    catch (std::exception& e)
+    catch (Exception& e)
    {
        impl->error = true;
-        impl->error_message = impl->error_message + "Exception: " + String(e.what()) + "\n";
+        impl->error_message = impl->error_message + "Parameter '"+ name + "': " + e.err + "\n";
+        return;
    }
+
+    CV_Error_(Error::StsBadArg, ("undeclared key '%s' requested", name.c_str()));
 }


@ -127,19 +146,27 @@ void CommandLineParser::getByIndex(int index, bool space_delete, int type, void*
            if (impl->data[i].number == index)
            {
                String v = impl->data[i].def_value;
-                if (space_delete == true) v = impl->cat_string(v);
+                if (space_delete == true) v = cat_string(v);
+
+                // the key was neither specified nor has it a default value
+                if((v.empty() && type != Param::STRING) || v == noneValue) {
+                    impl->error = true;
+                    impl->error_message = impl->error_message + format("Missing parameter #%d\n", index);
+                    return;
+                }
                from_str(v, type, dst);
                return;
            }
        }
-        impl->error = true;
-        impl->error_message = impl->error_message + "Unknown parameter #" + format("%d", index) + "\n";
    }
-    catch(std::exception & e)
+    catch(Exception& e)
    {
        impl->error = true;
-        impl->error_message = impl->error_message + "Exception: " + String(e.what()) + "\n";
+        impl->error_message = impl->error_message + format("Parameter #%d: ", index) + e.err + "\n";
+        return;
    }
+
+    CV_Error_(Error::StsBadArg, ("undeclared position %d requested", index));
 }

 static bool cmp_params(const CommandLineParserParams & p1, const CommandLineParserParams & p2)
@ -184,7 +211,7 @@ CommandLineParser::CommandLineParser(int argc, const char* const argv[], const S
        CommandLineParserParams p;
        p.keys = impl->split_string(l[0]);
        p.def_value = l[1];
-        p.help_message = impl->cat_string(l[2]);
+        p.help_message = cat_string(l[2]);
        p.number = -1;
        if (p.keys.size() <= 0)
        {
@ -207,25 +234,21 @@ CommandLineParser::CommandLineParser(int argc, const char* const argv[], const S
    jj = 0;
    for (int i = 1; i < argc; i++)
    {
-        String s = String(argv[i]);
+        String s(argv[i]);
+        bool hasSingleDash = s.length() > 1 && s[0] == '-';

-        if (s.find('=') != String::npos && s.find('=') < s.length())
+        if (hasSingleDash)
        {
-            std::vector<String> k_v = impl->split_string(s, '=', true);
-            for (int h = 0; h < 2; h++)
-            {
-                if (k_v[0][0] == '-')
-                    k_v[0] = k_v[0].substr(1, k_v[0].length() -1);
+            bool hasDoubleDash = s.length() > 2 && s[1] == '-';
+            String key = s.substr(hasDoubleDash ? 2 : 1);
+            String value = "true";
+            size_t equalsPos = key.find('=');
+
+            if(equalsPos != String::npos) {
+                value = key.substr(equalsPos + 1);
+                key = key.substr(0, equalsPos);
            }
-            impl->apply_params(k_v[0], k_v[1]);
-        }
-        else if (s.length() > 2 && s[0] == '-' && s[1] == '-')
-        {
-            impl->apply_params(s.substr(2), "true");
-        }
-        else if (s.length() > 1 && s[0] == '-')
-        {
-            impl->apply_params(s.substr(1), "true");
+            impl->apply_params(key, value);
        }
        else
        {
@ -303,16 +326,6 @@ void CommandLineParser::Impl::sort_params()
    std::sort (data.begin(), data.end(), cmp_params);
 }

-String CommandLineParser::Impl::cat_string(const String& str) const
-{
-    int left = 0, right = (int)str.length();
-    while( left <= right && str[left] == ' ' )
-        left++;
-    while( right > left && str[right-1] == ' ' )
-        right--;
-    return left >= right ? String("") : str.substr(left, right-left);
-}
-
 String CommandLineParser::getPathToApplication() const
 {
    return impl->path_to_app;
@ -324,12 +337,15 @@ bool CommandLineParser::has(const String& name) const
    {
        for (size_t j = 0; j < impl->data[i].keys.size(); j++)
        {
-            if (name.compare(impl->data[i].keys[j]) == 0 && String("true").compare(impl->data[i].def_value) == 0)
+            if (name == impl->data[i].keys[j])
            {
-                return true;
+                const String v = cat_string(impl->data[i].def_value);
+                return !v.empty() && v != noneValue;
            }
        }
    }
+
+    CV_Error_(Error::StsBadArg, ("undeclared key '%s' requested", name.c_str()));
    return false;
 }

@ -388,7 +404,7 @@ void CommandLineParser::printMessage() const
                    printf(", ");
                }
            }
-            String dv = impl->cat_string(impl->data[i].def_value);
+            String dv = cat_string(impl->data[i].def_value);
            if (dv.compare("") != 0)
            {
                printf(" (value:%s)", dv.c_str());
@ -408,7 +424,7 @@ void CommandLineParser::printMessage() const

            printf("%s", k.c_str());

-            String dv = impl->cat_string(impl->data[i].def_value);
+            String dv = cat_string(impl->data[i].def_value);
            if (dv.compare("") != 0)
            {
                printf(" (value:%s)", dv.c_str());
--- a/modules/core/src/convert.cpp
+++ b/modules/core/src/convert.cpp
@ -902,7 +902,7 @@ static bool ocl_split( InputArray _m, OutputArrayOfArrays _mv )
        argidx = k.set(argidx, ocl::KernelArg::WriteOnlyNoSize(dst[i]));
    k.set(argidx, rowsPerWI);

-    size_t globalsize[2] = { size.width, (size.height + rowsPerWI - 1) / rowsPerWI };
+    size_t globalsize[2] = { (size_t)size.width, ((size_t)size.height + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
 }

@ -1069,7 +1069,7 @@ static bool ocl_merge( InputArrayOfArrays _mv, OutputArray _dst )
    argidx = k.set(argidx, ocl::KernelArg::WriteOnly(dst));
    k.set(argidx, rowsPerWI);

-    size_t globalsize[2] = { dst.cols, (dst.rows + rowsPerWI - 1) / rowsPerWI };
+    size_t globalsize[2] = { (size_t)dst.cols, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
 }

@ -1338,7 +1338,7 @@ static bool ocl_mixChannels(InputArrayOfArrays _src, InputOutputArrayOfArrays _d
    argindex = k.set(argindex, size.width);
    k.set(argindex, rowsPerWI);

-    size_t globalsize[2] = { size.width, (size.height + rowsPerWI - 1) / rowsPerWI };
+    size_t globalsize[2] = { (size_t)size.width, ((size_t)size.height + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
 }

@ -5505,7 +5505,7 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha
    else if (wdepth == CV_64F)
        k.args(srcarg, dstarg, alpha, beta);

-    size_t globalsize[2] = { src.cols * cn / kercn, (src.rows + rowsPerWI - 1) / rowsPerWI };
+    size_t globalsize[2] = { (size_t)src.cols * cn / kercn, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
 }

@ -5673,7 +5673,7 @@ static bool ocl_LUT(InputArray _src, InputArray _lut, OutputArray _dst)
    k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::ReadOnlyNoSize(lut),
        ocl::KernelArg::WriteOnly(dst, dcn, kercn));

-    size_t globalSize[2] = { dst.cols * dcn / kercn, (dst.rows + 3) / 4 };
+    size_t globalSize[2] = { (size_t)dst.cols * dcn / kercn, ((size_t)dst.rows + 3) / 4 };
    return k.run(2, globalSize, NULL, false);
 }

@ -5682,7 +5682,7 @@ static bool ocl_LUT(InputArray _src, InputArray _lut, OutputArray _dst)
 #if defined(HAVE_IPP)
 namespace ipp {

-#if 0 // there are no performance benefits (PR #2653)
+#if IPP_DISABLE_BLOCK // there are no performance benefits (PR #2653)
 class IppLUTParallelBody_LUTC1 : public ParallelLoopBody
 {
 public:
@ -5850,7 +5850,7 @@ static bool ipp_lut(Mat &src, Mat &lut, Mat &dst)
    Ptr<ParallelLoopBody> body;

    size_t elemSize1 = CV_ELEM_SIZE1(dst.depth());
-#if 0 // there are no performance benefits (PR #2653)
+#if IPP_DISABLE_BLOCK // there are no performance benefits (PR #2653)
    if (lutcn == 1)
    {
        ParallelLoopBody* p = new ipp::IppLUTParallelBody_LUTC1(src, lut, dst, &ok);
@ -6053,7 +6053,7 @@ static bool ocl_normalize( InputArray _src, InputOutputArray _dst, InputArray _m
                k.args(srcarg, maskarg, dstarg);
        }

-        size_t globalsize[2] = { src.cols, (src.rows + rowsPerWI - 1) / rowsPerWI };
+        size_t globalsize[2] = { (size_t)src.cols, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI };
        return k.run(2, globalsize, NULL, false);
    }
    else
--- a/modules/core/src/copy.cpp
+++ b/modules/core/src/copy.cpp
@ -377,7 +377,7 @@ Mat& Mat::operator = (const Scalar& s)

    if( is[0] == 0 && is[1] == 0 && is[2] == 0 && is[3] == 0 )
    {
-#if defined HAVE_IPP && !defined HAVE_IPP_ICV_ONLY && 0
+#if defined HAVE_IPP && !defined HAVE_IPP_ICV_ONLY && IPP_DISABLE_BLOCK
        CV_IPP_CHECK()
        {
            if (dims <= 2 || isContinuous())
@ -692,7 +692,7 @@ static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
    size_t maxWorkGroupSize = dev.maxWorkGroupSize();
    CV_Assert(maxWorkGroupSize % 4 == 0);

-    size_t globalsize[2] = { cols, (rows + pxPerWIy - 1) / pxPerWIy },
+    size_t globalsize[2] = { (size_t)cols, ((size_t)rows + pxPerWIy - 1) / pxPerWIy },
            localsize[2] = { maxWorkGroupSize / 4, 4 };
    return k.run(2, globalsize, (flipType == FLIP_COLS) && !dev.isIntel() ? localsize : NULL, false);
 }
@ -833,7 +833,7 @@ static bool ocl_repeat(InputArray _src, int ny, int nx, OutputArray _dst)
    UMat src = _src.getUMat(), dst = _dst.getUMat();
    k.args(ocl::KernelArg::ReadOnly(src, cn, kercn), ocl::KernelArg::WriteOnlyNoSize(dst));

-    size_t globalsize[] = { src.cols * cn / kercn, (src.rows + rowsPerWI - 1) / rowsPerWI };
+    size_t globalsize[] = { (size_t)src.cols * cn / kercn, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
 }

@ -1110,7 +1110,7 @@ static bool ocl_copyMakeBorder( InputArray _src, OutputArray _dst, int top, int
    k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst),
           top, left, ocl::KernelArg::Constant(Mat(1, 1, sctype, value)));

-    size_t globalsize[2] = { dst.cols, (dst.rows + rowsPerWI - 1) / rowsPerWI };
+    size_t globalsize[2] = { (size_t)dst.cols, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
 }

@ -1157,7 +1157,7 @@ void cv::copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom,

    borderType &= ~BORDER_ISOLATED;

-#if defined HAVE_IPP && 0
+#if defined HAVE_IPP && IPP_DISABLE_BLOCK
    CV_IPP_CHECK()
    {
        typedef IppStatus (CV_STDCALL * ippiCopyMakeBorder)(const void * pSrc, int srcStep, IppiSize srcRoiSize, void * pDst,
--- a/modules/core/src/datastructs.cpp
+++ b/modules/core/src/datastructs.cpp
@ -352,6 +352,7 @@ CV_IMPL CvString
 cvMemStorageAllocString( CvMemStorage* storage, const char* ptr, int len )
 {
    CvString str;
+    memset(&str, 0, sizeof(CvString));

    str.len = len >= 0 ? len : (int)strlen(ptr);
    str.ptr = (char*)cvMemStorageAlloc( storage, str.len + 1 );
@ -1694,6 +1695,9 @@ cvSeqRemoveSlice( CvSeq* seq, CvSlice slice )

    slice.end_index = slice.start_index + length;

+    if ( slice.start_index == slice.end_index )
+        return;
+
    if( slice.end_index < total )
    {
        CvSeqReader reader_to, reader_from;
--- a/modules/core/src/directx.cpp
+++ b/modules/core/src/directx.cpp
@ -729,7 +729,7 @@ bool ocl_convert_nv12_to_bgr(

    k.args(clImageY, clImageUV, clBuffer, step, cols, rows);

-    size_t globalsize[] = { cols, rows };
+    size_t globalsize[] = { (size_t)cols, (size_t)rows };
    return k.run(2, globalsize, 0, false);
 }

@ -750,7 +750,7 @@ bool ocl_convert_bgr_to_nv12(

    k.args(clBuffer, step, cols, rows, clImageY, clImageUV);

-    size_t globalsize[] = { cols, rows };
+    size_t globalsize[] = { (size_t)cols, (size_t)rows };
    return k.run(2, globalsize, 0, false);
 }

@ -834,7 +834,7 @@ void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D)
    {
        size_t offset = 0; // TODO
        size_t origin[3] = { 0, 0, 0 };
-        size_t region[3] = { u.cols, u.rows, 1 };
+        size_t region[3] = { (size_t)u.cols, (size_t)u.rows, 1 };

        status = clEnqueueCopyBufferToImage(q, clBuffer, clImage, offset, origin, region, 0, NULL, NULL);
        if (status != CL_SUCCESS)
@ -939,7 +939,7 @@ void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst
    {
        size_t offset = 0; // TODO
        size_t origin[3] = { 0, 0, 0 };
-        size_t region[3] = { u.cols, u.rows, 1 };
+        size_t region[3] = { (size_t)u.cols, (size_t)u.rows, 1 };

        status = clEnqueueCopyImageToBuffer(q, clImage, clBuffer, origin, region, offset, 0, NULL, NULL);
        if (status != CL_SUCCESS)
@ -1041,7 +1041,7 @@ void convertToD3D10Texture2D(InputArray src, ID3D10Texture2D* pD3D10Texture2D)
        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireD3D10ObjectsKHR failed");
    size_t offset = 0; // TODO
    size_t dst_origin[3] = {0, 0, 0};
-    size_t region[3] = {u.cols, u.rows, 1};
+    size_t region[3] = {(size_t)u.cols, (size_t)u.rows, 1};
    status = clEnqueueCopyBufferToImage(q, clBuffer, clImage, offset, dst_origin, region, 0, NULL, NULL);
    if (status != CL_SUCCESS)
        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueCopyBufferToImage failed");
@ -1100,7 +1100,7 @@ void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst
        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireD3D10ObjectsKHR failed");
    size_t offset = 0; // TODO
    size_t src_origin[3] = {0, 0, 0};
-    size_t region[3] = {u.cols, u.rows, 1};
+    size_t region[3] = {(size_t)u.cols, (size_t)u.rows, 1};
    status = clEnqueueCopyImageToBuffer(q, clImage, clBuffer, src_origin, region, offset, 0, NULL, NULL);
    if (status != CL_SUCCESS)
        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueCopyImageToBuffer failed");
@ -1195,7 +1195,7 @@ void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurfa
        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireDX9MediaSurfacesKHR failed");
    size_t offset = 0; // TODO
    size_t dst_origin[3] = {0, 0, 0};
-    size_t region[3] = {u.cols, u.rows, 1};
+    size_t region[3] = {(size_t)u.cols, (size_t)u.rows, 1};
    status = clEnqueueCopyBufferToImage(q, clBuffer, clImage, offset, dst_origin, region, 0, NULL, NULL);
    if (status != CL_SUCCESS)
        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueCopyBufferToImage failed");
@ -1261,7 +1261,7 @@ void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArr
        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireDX9MediaSurfacesKHR failed");
    size_t offset = 0; // TODO
    size_t src_origin[3] = {0, 0, 0};
-    size_t region[3] = {u.cols, u.rows, 1};
+    size_t region[3] = {(size_t)u.cols, (size_t)u.rows, 1};
    status = clEnqueueCopyImageToBuffer(q, clImage, clBuffer, src_origin, region, offset, 0, NULL, NULL);
    if (status != CL_SUCCESS)
        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueCopyImageToBuffer failed");
--- a/modules/core/src/dxt.cpp
+++ b/modules/core/src/dxt.cpp
@ -54,7 +54,7 @@ namespace cv
 # pragma warning(disable: 4748)
 #endif

-#if IPP_VERSION_X100 >= 701
+#if IPP_VERSION_X100 >= 710
 #define USE_IPP_DFT 1
 #else
 #undef USE_IPP_DFT
@ -2934,7 +2934,7 @@ static bool ocl_mulSpectrums( InputArray _srcA, InputArray _srcB,
    k.args(ocl::KernelArg::ReadOnlyNoSize(A), ocl::KernelArg::ReadOnlyNoSize(B),
           ocl::KernelArg::WriteOnly(dst), rowsPerWI);

-    size_t globalsize[2] = { asize.width, (asize.height + rowsPerWI - 1) / rowsPerWI };
+    size_t globalsize[2] = { (size_t)asize.width, ((size_t)asize.height + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
 }

@ -3318,28 +3318,102 @@ static void IDCT_64f(const double* src, int src_step, double* dft_src, double* d

 }

+#ifdef HAVE_IPP
 namespace cv
 {
-#if defined HAVE_IPP && IPP_VERSION_MAJOR >= 7

+#if IPP_VERSION_X100 >= 900
+typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, const void* pDCTSpec, Ipp8u* pBuffer);
+typedef IppStatus (CV_STDCALL * ippiDCTInit)(void* pDCTSpec, IppiSize roiSize, Ipp8u* pMemInit );
+typedef IppStatus (CV_STDCALL * ippiDCTGetSize)(IppiSize roiSize, int* pSizeSpec, int* pSizeInit, int* pSizeBuf);
+#elif IPP_VERSION_X100 >= 700
 typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f*, int, Ipp32f*, int, const void*, Ipp8u*);
 typedef IppStatus (CV_STDCALL * ippiDCTInitAlloc)(void**, IppiSize, IppHintAlgorithm);
 typedef IppStatus (CV_STDCALL * ippiDCTFree)(void* pDCTSpec);
 typedef IppStatus (CV_STDCALL * ippiDCTGetBufSize)(const void*, int*);
+#endif

-template <typename Dct>
 class DctIPPLoop_Invoker : public ParallelLoopBody
 {
 public:
-
-    DctIPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dct* _ippidct, bool _inv, bool *_ok) :
-        ParallelLoopBody(), src(&_src), dst(&_dst), ippidct(_ippidct), inv(_inv), ok(_ok)
+    DctIPPLoop_Invoker(const Mat& _src, Mat& _dst, bool _inv, bool *_ok) :
+        ParallelLoopBody(), src(&_src), dst(&_dst), inv(_inv), ok(_ok)
    {
        *ok = true;
    }

    virtual void operator()(const Range& range) const
    {
+        if(*ok == false)
+            return;
+
+#if IPP_VERSION_X100 >= 900
+        IppiSize srcRoiSize = {src->cols, 1};
+
+        int specSize    = 0;
+        int initSize    = 0;
+        int bufferSize  = 0;
+
+        Ipp8u* pDCTSpec = NULL;
+        Ipp8u* pBuffer  = NULL;
+        Ipp8u* pInitBuf = NULL;
+
+        #define IPP_RETURN              \
+            if(pDCTSpec)                \
+                ippFree(pDCTSpec);      \
+            if(pBuffer)                 \
+                ippFree(pBuffer);       \
+            if(pInitBuf)                \
+                ippFree(pInitBuf);      \
+            return;
+
+        ippiDCTFunc     ippDctFun   = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R         : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
+        ippiDCTInit     ippDctInit     = inv ? (ippiDCTInit)ippiDCTInvInit_32f         : (ippiDCTInit)ippiDCTFwdInit_32f;
+        ippiDCTGetSize  ippDctGetSize  = inv ? (ippiDCTGetSize)ippiDCTInvGetSize_32f   : (ippiDCTGetSize)ippiDCTFwdGetSize_32f;
+
+        if(ippDctGetSize(srcRoiSize, &specSize, &initSize, &bufferSize) < 0)
+        {
+            *ok = false;
+            return;
+        }
+
+        pDCTSpec = (Ipp8u*)ippMalloc(specSize);
+        if(!pDCTSpec && specSize)
+        {
+            *ok = false;
+            return;
+        }
+
+        pBuffer  = (Ipp8u*)ippMalloc(bufferSize);
+        if(!pBuffer && bufferSize)
+        {
+            *ok = false;
+            IPP_RETURN
+        }
+        pInitBuf = (Ipp8u*)ippMalloc(initSize);
+        if(!pInitBuf && initSize)
+        {
+            *ok = false;
+            IPP_RETURN
+        }
+
+        if(ippDctInit(pDCTSpec, srcRoiSize, pInitBuf) < 0)
+        {
+            *ok = false;
+            IPP_RETURN
+        }
+
+        for(int i = range.start; i < range.end; ++i)
+        {
+            if(ippDctFun(src->ptr<float>(i), (int)src->step,dst->ptr<float>(i), (int)dst->step, pDCTSpec, pBuffer) < 0)
+            {
+                *ok = false;
+                IPP_RETURN
+            }
+        }
+        IPP_RETURN
+#undef IPP_RETURN
+#elif IPP_VERSION_X100 >= 700
        void* pDCTSpec;
        AutoBuffer<uchar> buf;
        uchar* pBuffer = 0;
@ -3349,6 +3423,7 @@ public:

        CV_SUPPRESS_DEPRECATED_START

+        ippiDCTFunc ippDctFun           = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R             : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
        ippiDCTInitAlloc ippInitAlloc   = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f   : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
        ippiDCTFree ippFree             = inv ? (ippiDCTFree)ippiDCTInvFree_32f             : (ippiDCTFree)ippiDCTFwdFree_32f;
        ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
@ -3359,8 +3434,13 @@ public:
            pBuffer = (uchar*)buf;

            for( int i = range.start; i < range.end; ++i)
-                if(!(*ippidct)(src->ptr<float>(i), (int)src->step,dst->ptr<float>(i), (int)dst->step, pDCTSpec, (Ipp8u*)pBuffer))
+            {
+                if(ippDctFun(src->ptr<float>(i), (int)src->step,dst->ptr<float>(i), (int)dst->step, pDCTSpec, (Ipp8u*)pBuffer) < 0)
+                {
                    *ok = false;
+                    break;
+                }
+            }
        }
        else
            *ok = false;
@ -3369,44 +3449,91 @@ public:
            ippFree(pDCTSpec);

        CV_SUPPRESS_DEPRECATED_END
+#else
+        CV_UNUSED(range);
+        *ok = false;
+#endif
    }

 private:
    const Mat* src;
    Mat* dst;
-    const Dct* ippidct;
    bool inv;
    bool *ok;
 };

-template <typename Dct>
-bool DctIPPLoop(const Mat& src, Mat& dst, const Dct& ippidct, bool inv)
+static bool DctIPPLoop(const Mat& src, Mat& dst, bool inv)
 {
    bool ok;
-    parallel_for_(Range(0, src.rows), DctIPPLoop_Invoker<Dct>(src, dst, &ippidct, inv, &ok), src.rows/(double)(1<<4) );
+    parallel_for_(Range(0, src.rows), DctIPPLoop_Invoker(src, dst, inv, &ok), src.rows/(double)(1<<4) );
    return ok;
 }

-struct IPPDCTFunctor
-{
-    IPPDCTFunctor(ippiDCTFunc _func) : func(_func){}
-
-    bool operator()(const Ipp32f* src, int srcStep, Ipp32f* dst, int dstStep, const void* pDCTSpec, Ipp8u* pBuffer) const
-    {
-        return func ? func(src, srcStep, dst, dstStep, pDCTSpec, pBuffer) >= 0 : false;
-    }
-private:
-    ippiDCTFunc func;
-};
-
 static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row)
 {
-    ippiDCTFunc ippFunc = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R ;
-
-    if (row)
-        return(DctIPPLoop(src,dst,IPPDCTFunctor(ippFunc),inv));
+    if(row)
+        return DctIPPLoop(src, dst, inv);
    else
    {
+#if IPP_VERSION_X100 >= 900
+        IppiSize srcRoiSize = {src.cols, src.rows};
+
+        int specSize    = 0;
+        int initSize    = 0;
+        int bufferSize  = 0;
+
+        Ipp8u* pDCTSpec = NULL;
+        Ipp8u* pBuffer  = NULL;
+        Ipp8u* pInitBuf = NULL;
+
+        #define IPP_RELEASE             \
+            if(pDCTSpec)                \
+                ippFree(pDCTSpec);      \
+            if(pBuffer)                 \
+                ippFree(pBuffer);       \
+            if(pInitBuf)                \
+                ippFree(pInitBuf);      \
+
+        ippiDCTFunc     ippDctFun      = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R         : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
+        ippiDCTInit     ippDctInit     = inv ? (ippiDCTInit)ippiDCTInvInit_32f         : (ippiDCTInit)ippiDCTFwdInit_32f;
+        ippiDCTGetSize  ippDctGetSize  = inv ? (ippiDCTGetSize)ippiDCTInvGetSize_32f   : (ippiDCTGetSize)ippiDCTFwdGetSize_32f;
+
+        if(ippDctGetSize(srcRoiSize, &specSize, &initSize, &bufferSize) < 0)
+            return false;
+
+        pDCTSpec = (Ipp8u*)ippMalloc(specSize);
+        if(!pDCTSpec && specSize)
+            return false;
+
+        pBuffer  = (Ipp8u*)ippMalloc(bufferSize);
+        if(!pBuffer && bufferSize)
+        {
+            IPP_RELEASE
+            return false;
+        }
+        pInitBuf = (Ipp8u*)ippMalloc(initSize);
+        if(!pInitBuf && initSize)
+        {
+            IPP_RELEASE
+            return false;
+        }
+
+        if(ippDctInit(pDCTSpec, srcRoiSize, pInitBuf) < 0)
+        {
+            IPP_RELEASE
+            return false;
+        }
+
+        if(ippDctFun(src.ptr<float>(), (int)src.step,dst.ptr<float>(), (int)dst.step, pDCTSpec, pBuffer) < 0)
+        {
+            IPP_RELEASE
+            return false;
+        }
+
+        IPP_RELEASE
+        return true;
+#undef IPP_RELEASE
+#elif IPP_VERSION_X100 >= 700
        IppStatus status;
        void* pDCTSpec;
        AutoBuffer<uchar> buf;
@ -3417,6 +3544,7 @@ static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row)

        CV_SUPPRESS_DEPRECATED_START

+        ippiDCTFunc ippDctFun           = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R             : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
        ippiDCTInitAlloc ippInitAlloc   = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f   : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
        ippiDCTFree ippFree             = inv ? (ippiDCTFree)ippiDCTInvFree_32f             : (ippiDCTFree)ippiDCTFwdFree_32f;
        ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
@ -3428,7 +3556,7 @@ static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row)
            buf.allocate( bufSize );
            pBuffer = (uchar*)buf;

-            status = ippFunc(src.ptr<float>(), (int)src.step, dst.ptr<float>(), (int)dst.step, pDCTSpec, (Ipp8u*)pBuffer);
+            status = ippDctFun(src.ptr<float>(), (int)src.step, dst.ptr<float>(), (int)dst.step, pDCTSpec, (Ipp8u*)pBuffer);
        }

        if (pDCTSpec)
@ -3437,11 +3565,14 @@ static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row)
        CV_SUPPRESS_DEPRECATED_END

        return status >= 0;
+#else
+        CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(inv); CV_UNUSED(row);
+        return false;
+#endif
    }
 }
-
-#endif
 }
+#endif

 void cv::dct( InputArray _src0, OutputArray _dst, int flags )
 {
@ -3473,21 +3604,7 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags )
    _dst.create( src.rows, src.cols, type );
    Mat dst = _dst.getMat();

-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
-    CV_IPP_CHECK()
-    {
-        bool row = (flags & DCT_ROWS) != 0;
-        if (src.type() == CV_32F)
-        {
-            if(ippi_DCT_32f(src,dst,inv, row))
-            {
-                CV_IMPL_ADD(CV_IMPL_IPP);
-                return;
-            }
-            setIppErrorStatus();
-        }
-    }
-#endif
+    CV_IPP_RUN(IPP_VERSION_X100 >= 700 && src.type() == CV_32F, ippi_DCT_32f(src, dst, inv, ((flags & DCT_ROWS) != 0)))

    DCTFunc dct_func = dct_tbl[(int)inv + (depth == CV_64F)*2];

--- a/modules/core/src/lda.cpp
+++ b/modules/core/src/lda.cpp
@ -937,9 +937,9 @@ public:
 // Linear Discriminant Analysis implementation
 //------------------------------------------------------------------------------

-LDA::LDA(int num_components) : _num_components(num_components) { }
+LDA::LDA(int num_components) : _dataAsRow(true), _num_components(num_components) { }

-LDA::LDA(InputArrayOfArrays src, InputArray labels, int num_components) : _num_components(num_components)
+LDA::LDA(InputArrayOfArrays src, InputArray labels, int num_components) : _dataAsRow(true),  _num_components(num_components)
 {
    this->compute(src, labels); //! compute eigenvectors and eigenvalues
 }
@ -1106,14 +1106,14 @@ void LDA::compute(InputArrayOfArrays _src, InputArray _lbls) {
    }
 }

-// Projects samples into the LDA subspace.
+// Projects one or more row aligned samples into the LDA subspace.
 Mat LDA::project(InputArray src) {
-   return subspaceProject(_eigenvectors, Mat(), _dataAsRow ? src : src.getMat().t());
+   return subspaceProject(_eigenvectors, Mat(), src);
 }

-// Reconstructs projections from the LDA subspace.
+// Reconstructs projections from the LDA subspace from one or more row aligned samples.
 Mat LDA::reconstruct(InputArray src) {
-   return subspaceReconstruct(_eigenvectors, Mat(), _dataAsRow ? src : src.getMat().t());
+   return subspaceReconstruct(_eigenvectors, Mat(), src);
 }

 }
--- a/modules/core/src/mathfuncs.cpp
+++ b/modules/core/src/mathfuncs.cpp
@ -93,7 +93,7 @@ static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, in
    else
        k.args(src1arg, src2arg, dstarg);

-    size_t globalsize[] = { src1.cols * cn / kercn, (src1.rows + rowsPerWI - 1) / rowsPerWI };
+    size_t globalsize[] = { (size_t)src1.cols * cn / kercn, ((size_t)src1.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, 0, false);
 }

@ -330,7 +330,7 @@ static bool ocl_cartToPolar( InputArray _src1, InputArray _src2,
           ocl::KernelArg::WriteOnly(dst1, cn),
           ocl::KernelArg::WriteOnlyNoSize(dst2));

-    size_t globalsize[2] = { dst1.cols * cn, (dst1.rows + rowsPerWI - 1) / rowsPerWI };
+    size_t globalsize[2] = { (size_t)dst1.cols * cn, ((size_t)dst1.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
 }

@ -612,7 +612,7 @@ static bool ocl_polarToCart( InputArray _mag, InputArray _angle,
    k.args(ocl::KernelArg::ReadOnlyNoSize(mag), ocl::KernelArg::ReadOnlyNoSize(angle),
           ocl::KernelArg::WriteOnly(dst1, cn), ocl::KernelArg::WriteOnlyNoSize(dst2));

-    size_t globalsize[2] = { dst1.cols * cn, (dst1.rows + rowsPerWI - 1) / rowsPerWI };
+    size_t globalsize[2] = { (size_t)dst1.cols * cn, ((size_t)dst1.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
 }

@ -1181,8 +1181,8 @@ iPow_i( const T* src, T* dst, int len, int power )
    {
        T tab[5] =
        {
-            power == -1 ? saturate_cast<T>(-1) : 0, (power & 1) ? -1 : 1,
-            std::numeric_limits<T>::max(), 1, power == -1 ? 1 : 0
+            saturate_cast<T>(power == -1 ? -1 : 0), saturate_cast<T>((power & 1) ? -1 : 1),
+            std::numeric_limits<T>::max(), 1, saturate_cast<T>(power == -1 ? 1 : 0)
        };
        for( int i = 0; i < len; i++ )
        {
@ -1349,7 +1349,7 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
            k.args(srcarg, dstarg, power);
    }

-    size_t globalsize[2] = { dst.cols *  cn, (dst.rows + rowsPerWI - 1) / rowsPerWI };
+    size_t globalsize[2] = { (size_t)dst.cols *  cn, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
 }

@ -1734,7 +1734,7 @@ static bool ocl_patchNaNs( InputOutputArray _a, float value )
    k.args(ocl::KernelArg::ReadOnlyNoSize(a),
           ocl::KernelArg::WriteOnly(a, cn), (float)value);

-    size_t globalsize[2] = { a.cols * cn, (a.rows + rowsPerWI - 1) / rowsPerWI };
+    size_t globalsize[2] = { (size_t)a.cols * cn, ((size_t)a.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
 }

--- a/modules/core/src/matmul.cpp
+++ b/modules/core/src/matmul.cpp
@ -859,8 +859,8 @@ static bool ocl_gemm( InputArray matA, InputArray matB, double alpha,
               ocl::KernelArg::ReadWrite(D, cn, kercn),
               sizeA.width, (float)alpha, (float)beta);

-    size_t globalsize[2] = { sizeD.width * cn / kercn, sizeD.height};
-    size_t localsize[2] = { block_size, block_size};
+    size_t globalsize[2] = { (size_t)sizeD.width * cn / kercn, (size_t)sizeD.height};
+    size_t localsize[2] = { (size_t)block_size, (size_t)block_size};
    return k.run(2, globalsize, block_size!=1 ? localsize : NULL, false);
 }
 #endif
@ -2304,7 +2304,7 @@ static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, Outp
    else
        k.args(src1arg, src2arg, dstarg, alpha);

-    size_t globalsize[2] = { dst.cols * cn / kercn, (dst.rows + rowsPerWI - 1) / rowsPerWI };
+    size_t globalsize[2] = { (size_t)dst.cols * cn / kercn, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
 }

@ -2916,7 +2916,7 @@ dotProd_(const T* src1, const T* src2, int len)
 static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
 {
    double r = 0;
-#if ARITHM_USE_IPP && 0
+#if ARITHM_USE_IPP && IPP_DISABLE_BLOCK
    CV_IPP_CHECK()
    {
        if (0 <= ippiDotProd_8u64f_C1R(src1, (int)(len*sizeof(src1[0])),
@ -3131,7 +3131,7 @@ static double dotProd_16u(const ushort* src1, const ushort* src2, int len)

 static double dotProd_16s(const short* src1, const short* src2, int len)
 {
-#if (ARITHM_USE_IPP == 1)
+#if (ARITHM_USE_IPP == 1) && (IPP_VERSION_X100 != 900) // bug in IPP 9.0.0
    CV_IPP_CHECK()
    {
        double r = 0;
--- a/modules/core/src/matop.cpp
+++ b/modules/core/src/matop.cpp
@ -1583,12 +1583,12 @@ void MatOp_Initializer::multiply(const MatExpr& e, double s, MatExpr& res) const

 inline void MatOp_Initializer::makeExpr(MatExpr& res, int method, Size sz, int type, double alpha)
 {
-    res = MatExpr(getGlobalMatOpInitializer(), method, Mat(sz, type, (void*)0xEEEEEEEE), Mat(), Mat(), alpha, 0);
+    res = MatExpr(getGlobalMatOpInitializer(), method, Mat(sz, type, (void*)(size_t)0xEEEEEEEE), Mat(), Mat(), alpha, 0);
 }

 inline void MatOp_Initializer::makeExpr(MatExpr& res, int method, int ndims, const int* sizes, int type, double alpha)
 {
-    res = MatExpr(getGlobalMatOpInitializer(), method, Mat(ndims, sizes, type, (void*)0xEEEEEEEE), Mat(), Mat(), alpha, 0);
+    res = MatExpr(getGlobalMatOpInitializer(), method, Mat(ndims, sizes, type, (void*)(size_t)0xEEEEEEEE), Mat(), Mat(), alpha, 0);
 }

 ///////////////////////////////////////////////////////////////////////////////////////////////////////////
--- a/modules/core/src/matrix.cpp
+++ b/modules/core/src/matrix.cpp
@ -208,17 +208,14 @@ public:
        if(!u)
            return;

-        CV_Assert(u->urefcount >= 0);
-        CV_Assert(u->refcount >= 0);
-        if(u->refcount == 0)
+        CV_Assert(u->urefcount == 0);
+        CV_Assert(u->refcount == 0);
+        if( !(u->flags & UMatData::USER_ALLOCATED) )
        {
-            if( !(u->flags & UMatData::USER_ALLOCATED) )
-            {
-                fastFree(u->origdata);
-                u->origdata = 0;
-            }
-            delete u;
+            fastFree(u->origdata);
+            u->origdata = 0;
        }
+        delete u;
    }
 };

@ -2834,7 +2831,7 @@ static bool ocl_setIdentity( InputOutputArray _m, const Scalar& s )
    k.args(ocl::KernelArg::WriteOnly(m, cn, kercn),
           ocl::KernelArg::Constant(Mat(1, 1, sctype, s)));

-    size_t globalsize[2] = { m.cols * cn / kercn, (m.rows + rowsPerWI - 1) / rowsPerWI };
+    size_t globalsize[2] = { (size_t)m.cols * cn / kercn, ((size_t)m.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
 }

@ -3074,7 +3071,7 @@ static bool ocl_transpose( InputArray _src, OutputArray _dst )
               ocl::KernelArg::WriteOnlyNoSize(dst));

    size_t localsize[2]  = { TILE_DIM, BLOCK_ROWS };
-    size_t globalsize[2] = { src.cols, inplace ? (src.rows + rowsPerWI - 1) / rowsPerWI : (divUp(src.rows, TILE_DIM) * BLOCK_ROWS) };
+    size_t globalsize[2] = { (size_t)src.cols, inplace ? ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI : (divUp((size_t)src.rows, TILE_DIM) * BLOCK_ROWS) };

    if (inplace && dev.isIntel())
    {
@ -3370,22 +3367,20 @@ typedef void (*ReduceFunc)( const Mat& src, Mat& dst );
 #define reduceMinR32f reduceR_<float, float, OpMin<float> >
 #define reduceMinR64f reduceR_<double,double,OpMin<double> >

-#if IPP_VERSION_X100 > 0
-
-static inline void reduceSumC_8u16u16s32f_64f(const cv::Mat& srcmat, cv::Mat& dstmat)
+#ifdef HAVE_IPP
+static inline bool ipp_reduceSumC_8u16u16s32f_64f(const cv::Mat& srcmat, cv::Mat& dstmat)
 {
-    cv::Size size = srcmat.size();
-    IppiSize roisize = { size.width, 1 };
    int sstep = (int)srcmat.step, stype = srcmat.type(),
-            sdepth = CV_MAT_DEPTH(stype), ddepth = dstmat.depth();
+            ddepth = dstmat.depth();
+
+    IppiSize roisize = { srcmat.size().width, 1 };

    typedef IppStatus (CV_STDCALL * ippiSum)(const void * pSrc, int srcStep, IppiSize roiSize, Ipp64f* pSum);
    typedef IppStatus (CV_STDCALL * ippiSumHint)(const void * pSrc, int srcStep, IppiSize roiSize, Ipp64f* pSum, IppHintAlgorithm hint);
    ippiSum ippFunc = 0;
    ippiSumHint ippFuncHint = 0;
-    cv::ReduceFunc func = 0;

-    if (ddepth == CV_64F)
+    if(ddepth == CV_64F)
    {
        ippFunc =
            stype == CV_8UC1 ? (ippiSum)ippiSum_8u_C1R :
@ -3401,42 +3396,47 @@ static inline void reduceSumC_8u16u16s32f_64f(const cv::Mat& srcmat, cv::Mat& ds
            stype == CV_32FC1 ? (ippiSumHint)ippiSum_32f_C1R :
            stype == CV_32FC3 ? (ippiSumHint)ippiSum_32f_C3R :
            stype == CV_32FC4 ? (ippiSumHint)ippiSum_32f_C4R : 0;
-        func =
-            sdepth == CV_8U ? (cv::ReduceFunc)cv::reduceC_<uchar, double,   cv::OpAdd<double> > :
-            sdepth == CV_16U ? (cv::ReduceFunc)cv::reduceC_<ushort, double,   cv::OpAdd<double> > :
-            sdepth == CV_16S ? (cv::ReduceFunc)cv::reduceC_<short, double,   cv::OpAdd<double> > :
-            sdepth == CV_32F ? (cv::ReduceFunc)cv::reduceC_<float, double,   cv::OpAdd<double> > : 0;
    }
-    CV_Assert(!(ippFunc && ippFuncHint) && func);

-    CV_IPP_CHECK()
+    if(ippFunc)
    {
-        if (ippFunc)
+        for(int y = 0; y < srcmat.size().height; y++)
        {
-            for (int y = 0; y < size.height; ++y)
-                if (ippFunc(srcmat.ptr(y), sstep, roisize, dstmat.ptr<Ipp64f>(y)) < 0)
-                {
-                    setIppErrorStatus();
-                    cv::Mat dstroi = dstmat.rowRange(y, y + 1);
-                    func(srcmat.rowRange(y, y + 1), dstroi);
-                }
-            CV_IMPL_ADD(CV_IMPL_IPP);
-            return;
+            if(ippFunc(srcmat.ptr(y), sstep, roisize, dstmat.ptr<Ipp64f>(y)) < 0)
+                return false;
        }
-        else if (ippFuncHint)
+        return true;
+    }
+    else if(ippFuncHint)
+    {
+        for(int y = 0; y < srcmat.size().height; y++)
        {
-            for (int y = 0; y < size.height; ++y)
-                if (ippFuncHint(srcmat.ptr(y), sstep, roisize, dstmat.ptr<Ipp64f>(y), ippAlgHintAccurate) < 0)
-                {
-                    setIppErrorStatus();
-                    cv::Mat dstroi = dstmat.rowRange(y, y + 1);
-                    func(srcmat.rowRange(y, y + 1), dstroi);
-                }
-            CV_IMPL_ADD(CV_IMPL_IPP);
-            return;
+            if(ippFuncHint(srcmat.ptr(y), sstep, roisize, dstmat.ptr<Ipp64f>(y), ippAlgHintAccurate) < 0)
+                return false;
        }
+        return true;
    }

+    return false;
+}
+
+static inline void reduceSumC_8u16u16s32f_64f(const cv::Mat& srcmat, cv::Mat& dstmat)
+{
+    CV_IPP_RUN(true, ipp_reduceSumC_8u16u16s32f_64f(srcmat, dstmat));
+
+    cv::ReduceFunc func = 0;
+
+    if(dstmat.depth() == CV_64F)
+    {
+        int sdepth = CV_MAT_DEPTH(srcmat.type());
+        func =
+            sdepth == CV_8U ? (cv::ReduceFunc)cv::reduceC_<uchar, double,   cv::OpAdd<double> > :
+            sdepth == CV_16U ? (cv::ReduceFunc)cv::reduceC_<ushort, double,   cv::OpAdd<double> > :
+            sdepth == CV_16S ? (cv::ReduceFunc)cv::reduceC_<short, double,   cv::OpAdd<double> > :
+            sdepth == CV_32F ? (cv::ReduceFunc)cv::reduceC_<float, double,   cv::OpAdd<double> > : 0;
+    }
+    CV_Assert(func);
+
    func(srcmat, dstmat);
 }

@ -3449,7 +3449,7 @@ static inline void reduceSumC_8u16u16s32f_64f(const cv::Mat& srcmat, cv::Mat& ds
 #define reduceSumC32f32f reduceC_<float, float, OpAdd<float> >
 #define reduceSumC64f64f reduceC_<double,double,OpAdd<double> >

-#if IPP_VERSION_X100 > 0
+#ifdef HAVE_IPP
 #define reduceSumC8u64f  reduceSumC_8u16u16s32f_64f
 #define reduceSumC16u64f reduceSumC_8u16u16s32f_64f
 #define reduceSumC16s64f reduceSumC_8u16u16s32f_64f
@ -3461,35 +3461,32 @@ static inline void reduceSumC_8u16u16s32f_64f(const cv::Mat& srcmat, cv::Mat& ds
 #define reduceSumC32f64f reduceC_<float, double,OpAdd<double> >
 #endif

-#if IPP_VERSION_X100 > 0
+#ifdef HAVE_IPP
 #define REDUCE_OP(favor, optype, type1, type2) \
-static inline void reduce##optype##C##favor(const cv::Mat& srcmat, cv::Mat& dstmat) \
+static inline bool ipp_reduce##optype##C##favor(const cv::Mat& srcmat, cv::Mat& dstmat) \
 { \
-    typedef Ipp##favor IppType; \
-    cv::Size size = srcmat.size(); \
-    IppiSize roisize = ippiSize(size.width, 1);\
-    int sstep = (int)srcmat.step; \
-     \
-    if (CV_IPP_CHECK_COND && (srcmat.channels() == 1)) \
+    if((srcmat.channels() == 1)) \
    { \
-        for (int y = 0; y < size.height; ++y) \
-            if (ippi##optype##_##favor##_C1R(srcmat.ptr<IppType>(y), sstep, roisize, dstmat.ptr<IppType>(y)) < 0) \
-            { \
-                setIppErrorStatus(); \
-                cv::Mat dstroi = dstmat.rowRange(y, y + 1); \
-                cv::reduceC_ < type1, type2, cv::Op##optype < type2 > >(srcmat.rowRange(y, y + 1), dstroi); \
-            } \
-            else \
-            { \
-                CV_IMPL_ADD(CV_IMPL_IPP);\
-            } \
-        return; \
-    } \
+        int sstep = (int)srcmat.step; \
+        typedef Ipp##favor IppType; \
+        IppiSize roisize = ippiSize(srcmat.size().width, 1);\
+        for(int y = 0; y < srcmat.size().height; y++)\
+        {\
+            if(ippi##optype##_##favor##_C1R(srcmat.ptr<IppType>(y), sstep, roisize, dstmat.ptr<IppType>(y)) < 0)\
+                return false;\
+        }\
+        return true;\
+    }\
+    return false; \
+} \
+static inline void reduce##optype##C##favor(const cv::Mat& srcmat, cv::Mat& dstmat) \
+{ \
+    CV_IPP_RUN(true, ipp_reduce##optype##C##favor(srcmat, dstmat)); \
    cv::reduceC_ < type1, type2, cv::Op##optype < type2 > >(srcmat, dstmat); \
 }
 #endif

-#if IPP_VERSION_X100 > 0
+#ifdef HAVE_IPP
 REDUCE_OP(8u, Max, uchar, uchar)
 REDUCE_OP(16u, Max, ushort, ushort)
 REDUCE_OP(16s, Max, short, short)
@ -3502,7 +3499,7 @@ REDUCE_OP(32f, Max, float, float)
 #endif
 #define reduceMaxC64f reduceC_<double,double,OpMax<double> >

-#if IPP_VERSION_X100 > 0
+#ifdef HAVE_IPP
 REDUCE_OP(8u, Min, uchar, uchar)
 REDUCE_OP(16u, Min, ushort, ushort)
 REDUCE_OP(16s, Min, short, short)
@ -3579,8 +3576,8 @@ static bool ocl_reduce(InputArray _src, OutputArray _dst,
            k.args(ocl::KernelArg::ReadOnly(src),
                      ocl::KernelArg::WriteOnlyNoSize(dst));

-        size_t localSize[2] = { buf_cols, tileHeight};
-        size_t globalSize[2] = { buf_cols, src.rows };
+        size_t localSize[2] = { (size_t)buf_cols, (size_t)tileHeight};
+        size_t globalSize[2] = { (size_t)buf_cols, (size_t)src.rows };
        return k.run(2, globalSize, localSize, false);
    }
    else
@ -3775,7 +3772,7 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype)
 namespace cv
 {

-#if IPP_VERSION_X100 > 0
+#ifdef HAVE_IPP
 #define USE_IPP_SORT

 typedef IppStatus (CV_STDCALL * IppSortFunc)(void *, int);
@ -3785,18 +3782,24 @@ static IppSortFunc getSortFunc(int depth, bool sortDescending)
 {
    if (!sortDescending)
        return depth == CV_8U ? (IppSortFunc)ippsSortAscend_8u_I :
-            /*depth == CV_16U ? (IppSortFunc)ippsSortAscend_16u_I :
+#if IPP_DISABLE_BLOCK
+            depth == CV_16U ? (IppSortFunc)ippsSortAscend_16u_I :
            depth == CV_16S ? (IppSortFunc)ippsSortAscend_16s_I :
            depth == CV_32S ? (IppSortFunc)ippsSortAscend_32s_I :
            depth == CV_32F ? (IppSortFunc)ippsSortAscend_32f_I :
-            depth == CV_64F ? (IppSortFunc)ippsSortAscend_64f_I :*/ 0;
+            depth == CV_64F ? (IppSortFunc)ippsSortAscend_64f_I :
+#endif
+            0;
    else
        return depth == CV_8U ? (IppSortFunc)ippsSortDescend_8u_I :
-            /*depth == CV_16U ? (IppSortFunc)ippsSortDescend_16u_I :
+#if IPP_DISABLE_BLOCK
+            depth == CV_16U ? (IppSortFunc)ippsSortDescend_16u_I :
            depth == CV_16S ? (IppSortFunc)ippsSortDescend_16s_I :
            depth == CV_32S ? (IppSortFunc)ippsSortDescend_32s_I :
            depth == CV_32F ? (IppSortFunc)ippsSortDescend_32f_I :
-            depth == CV_64F ? (IppSortFunc)ippsSortDescend_64f_I :*/ 0;
+            depth == CV_64F ? (IppSortFunc)ippsSortDescend_64f_I :
+#endif
+            0;
 }

 static IppFlipFunc getFlipFunc(int depth)
@ -3911,7 +3914,7 @@ public:
    const _Tp* arr;
 };

-#if defined USE_IPP_SORT && 0
+#if defined USE_IPP_SORT && IPP_DISABLE_BLOCK

 typedef IppStatus (CV_STDCALL *IppSortIndexFunc)(void *, int *, int);

@ -3958,7 +3961,7 @@ template<typename T> static void sortIdx_( const Mat& src, Mat& dst, int flags )
    bptr = (T*)buf;
    _iptr = (int*)ibuf;

-#if defined USE_IPP_SORT && 0
+#if defined USE_IPP_SORT && IPP_DISABLE_BLOCK
    int depth = src.depth();
    IppSortIndexFunc ippFunc = 0;
    IppFlipFunc ippFlipFunc = 0;
@ -3987,27 +3990,27 @@ template<typename T> static void sortIdx_( const Mat& src, Mat& dst, int flags )
        for( j = 0; j < len; j++ )
            iptr[j] = j;

-#if defined USE_IPP_SORT && 0
+#if defined USE_IPP_SORT && IPP_DISABLE_BLOCK
        if (sortRows || !ippFunc || ippFunc(ptr, iptr, len) < 0)
 #endif
        {
-#if defined USE_IPP_SORT && 0
+#if defined USE_IPP_SORT && IPP_DISABLE_BLOCK
            setIppErrorStatus();
 #endif
            std::sort( iptr, iptr + len, LessThanIdx<T>(ptr) );
            if( sortDescending )
            {
-#if defined USE_IPP_SORT && 0
+#if defined USE_IPP_SORT && IPP_DISABLE_BLOCK
                if (!ippFlipFunc || ippFlipFunc(iptr, len) < 0)
 #endif
                {
-#if defined USE_IPP_SORT && 0
+#if defined USE_IPP_SORT && IPP_DISABLE_BLOCK
                    setIppErrorStatus();
 #endif
                    for( j = 0; j < len/2; j++ )
                        std::swap(iptr[j], iptr[len-1-j]);
                }
-#if defined USE_IPP_SORT && 0
+#if defined USE_IPP_SORT && IPP_DISABLE_BLOCK
                else
                {
                    CV_IMPL_ADD(CV_IMPL_IPP);
@ -4015,7 +4018,7 @@ template<typename T> static void sortIdx_( const Mat& src, Mat& dst, int flags )
 #endif
            }
        }
-#if defined USE_IPP_SORT && 0
+#if defined USE_IPP_SORT && IPP_DISABLE_BLOCK
        else
        {
            CV_IMPL_ADD(CV_IMPL_IPP);
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@ -4453,8 +4453,11 @@ public:
 #endif
            {
                tempUMatFlags = UMatData::TEMP_UMAT;
-                handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
-                                           u->size, u->origdata, &retval);
+                if (u->origdata == cv::alignPtr(u->origdata, 4)) // There are OpenCL runtime issues for less aligned data
+                {
+                    handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
+                                            u->size, u->origdata, &retval);
+                }
                if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST))
                {
                    handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
@ -4510,16 +4513,17 @@ public:
        if(!u)
            return;

-        CV_Assert(u->urefcount >= 0);
-        CV_Assert(u->refcount >= 0);
+        CV_Assert(u->urefcount == 0);
+        CV_Assert(u->refcount == 0 && "UMat deallocation error: some derived Mat is still alive");

-        CV_Assert(u->handle != 0 && u->urefcount == 0);
+        CV_Assert(u->handle != 0);
+        CV_Assert(u->mapcount == 0);
        if(u->tempUMat())
        {
            CV_Assert(u->origdata);
 //            UMatDataAutoLock lock(u);

-            if( u->hostCopyObsolete() && u->refcount > 0 )
+            if (u->hostCopyObsolete())
            {
 #ifdef HAVE_OPENCL_SVM
                if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
@ -4572,16 +4576,29 @@ public:
                    else
                    {
                        cl_int retval = 0;
-                        void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
-                                                        (CL_MAP_READ | CL_MAP_WRITE),
-                                                        0, u->size, 0, 0, 0, &retval);
-                        CV_OclDbgAssert(retval == CL_SUCCESS);
-                        CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS);
-                        CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
+                        if (u->tempUMat())
+                        {
+                            CV_Assert(u->mapcount == 0);
+                            void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
+                                (CL_MAP_READ | CL_MAP_WRITE),
+                                0, u->size, 0, 0, 0, &retval);
+                            CV_Assert(u->origdata == data);
+                            CV_OclDbgAssert(retval == CL_SUCCESS);
+                            if (u->originalUMatData)
+                            {
+                                CV_Assert(u->originalUMatData->data == data);
+                            }
+                            CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS);
+                            CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
+                        }
                    }
                }
                u->markHostCopyObsolete(false);
            }
+            else
+            {
+                // nothing
+            }
 #ifdef HAVE_OPENCL_SVM
            if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
            {
@ -4607,16 +4624,12 @@ public:
            if(u->data && u->copyOnMap() && u->data != u->origdata)
                fastFree(u->data);
            u->data = u->origdata;
-            if(u->refcount == 0)
-            {
-                u->currAllocator->deallocate(u);
-                u = NULL;
-            }
+            u->currAllocator->deallocate(u);
+            u = NULL;
        }
        else
        {
            CV_Assert(u->origdata == NULL);
-            CV_Assert(u->refcount == 0);
            if(u->data && u->copyOnMap() && u->data != u->origdata)
            {
                fastFree(u->data);
@ -4665,17 +4678,13 @@ public:
            delete u;
            u = NULL;
        }
-        CV_Assert(u == NULL || u->refcount);
+        CV_Assert(u == NULL);
    }

+    // synchronized call (external UMatDataAutoLock, see UMat::getMat)
    void map(UMatData* u, int accessFlags) const
    {
-        if(!u)
-            return;
-
-        CV_Assert( u->handle != 0 );
-
-        UMatDataAutoLock autolock(u);
+        CV_Assert(u && u->handle);

        if(accessFlags & ACCESS_WRITE)
            u->markDeviceCopyObsolete(true);
@ -4715,11 +4724,16 @@ public:
                }
 #endif

-                cl_int retval = 0;
-                u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
-                                                     (CL_MAP_READ | CL_MAP_WRITE),
-                                                     0, u->size, 0, 0, 0, &retval);
-                if(u->data && retval == CL_SUCCESS)
+                cl_int retval = CL_SUCCESS;
+                if (!u->deviceMemMapped())
+                {
+                    CV_Assert(u->refcount == 1);
+                    CV_Assert(u->mapcount++ == 0);
+                    u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
+                                                         (CL_MAP_READ | CL_MAP_WRITE),
+                                                         0, u->size, 0, 0, 0, &retval);
+                }
+                if (u->data && retval == CL_SUCCESS)
                {
                    u->markHostCopyObsolete(false);
                    u->markDeviceMemMapped(true);
@ -4765,7 +4779,6 @@ public:
        if( !u->copyOnMap() && u->deviceMemMapped() )
        {
            CV_Assert(u->data != NULL);
-            u->markDeviceMemMapped(false);
 #ifdef HAVE_OPENCL_SVM
            if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
            {
@ -4792,16 +4805,21 @@ public:
                return;
            }
 #endif
-            CV_Assert( (retval = clEnqueueUnmapMemObject(q,
-                                (cl_mem)u->handle, u->data, 0, 0, 0)) == CL_SUCCESS );
-            if (Device::getDefault().isAMD())
-            {
-                // required for multithreaded applications (see stitching test)
-                CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
-            }
-
            if (u->refcount == 0)
+            {
+                CV_Assert(u->mapcount-- == 1);
+                CV_Assert((retval = clEnqueueUnmapMemObject(q,
+                          (cl_mem)u->handle, u->data, 0, 0, 0)) == CL_SUCCESS);
+                if (Device::getDefault().isAMD())
+                {
+                    // required for multithreaded applications (see stitching test)
+                    CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
+                }
+                u->markDeviceMemMapped(false);
                u->data = 0;
+                u->markDeviceCopyObsolete(false);
+                u->markHostCopyObsolete(true);
+            }
        }
        else if( u->copyOnMap() && u->deviceCopyObsolete() )
        {
@ -4811,9 +4829,9 @@ public:
 #endif
            CV_Assert( (retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
                                u->size, alignedPtr.getAlignedPtr(), 0, 0, 0)) == CL_SUCCESS );
+            u->markDeviceCopyObsolete(false);
+            u->markHostCopyObsolete(true);
        }
-        u->markDeviceCopyObsolete(false);
-        u->markHostCopyObsolete(true);
    }

    bool checkContinuous(int dims, const size_t sz[],
--- a/modules/core/src/parallel_pthreads.cpp
+++ b/modules/core/src/parallel_pthreads.cpp
@ -304,14 +304,18 @@ void ForThread::stop()
 {
    if(m_state == eFTStarted)
    {
+        pthread_mutex_lock(&m_thread_mutex);
        m_state = eFTToStop;
+        pthread_mutex_unlock(&m_thread_mutex);

        run();

        pthread_join(m_posix_thread, NULL);
    }

+    pthread_mutex_lock(&m_thread_mutex);
    m_state = eFTStoped;
+    pthread_mutex_unlock(&m_thread_mutex);
 }

 void ForThread::run()
--- a/modules/core/src/precomp.hpp
+++ b/modules/core/src/precomp.hpp
@ -50,7 +50,7 @@
 #include "opencv2/core/core_c.h"
 #include "opencv2/core/cuda.hpp"
 #include "opencv2/core/opengl.hpp"
-#include "opencv2/core/vaapi.hpp"
+#include "opencv2/core/va_intel.hpp"

 #include "opencv2/core/private.hpp"
 #include "opencv2/core/private.cuda.hpp"
@ -206,7 +206,7 @@ extern volatile bool USE_AVX2;

 enum { BLOCK_SIZE = 1024 };

-#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7)
+#if defined HAVE_IPP && (IPP_VERSION_X100 >= 700)
 #define ARITHM_USE_IPP 1
 #else
 #define ARITHM_USE_IPP 0
--- a/modules/core/src/stat.cpp
+++ b/modules/core/src/stat.cpp
@ -1141,7 +1141,7 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask
 #ifdef HAVE_IPP
 static bool ipp_sum(Mat &src, Scalar &_res)
 {
-#if IPP_VERSION_MAJOR >= 7
+#if IPP_VERSION_X100 >= 700
    int cn = src.channels();
    size_t total_size = src.total();
    int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
@ -1203,7 +1203,7 @@ cv::Scalar cv::sum( InputArray _src )
 #endif

    Mat src = _src.getMat();
-    CV_IPP_RUN(IPP_VERSION_MAJOR >= 7, ipp_sum(src, _res), _res);
+    CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_sum(src, _res), _res);

    int k, cn = src.channels(), depth = src.depth();
    SumFunc func = getSumFunc(depth);
@ -1368,101 +1368,107 @@ int cv::countNonZero( InputArray _src )
    return nz;
 }

-cv::Scalar cv::mean( InputArray _src, InputArray _mask )
+#if defined HAVE_IPP
+namespace cv
 {
-    Mat src = _src.getMat(), mask = _mask.getMat();
-    CV_Assert( mask.empty() || mask.type() == CV_8U );
-
-    int k, cn = src.channels(), depth = src.depth();
-
-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
-    CV_IPP_CHECK()
+static bool ipp_mean( Mat &src, Mat &mask, Scalar &ret )
+{
+#if IPP_VERSION_X100 >= 700
+    size_t total_size = src.total();
+    int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
+    if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
    {
-        size_t total_size = src.total();
-        int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
-        if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
+        IppiSize sz = { cols, rows };
+        int type = src.type();
+        if( !mask.empty() )
        {
-            IppiSize sz = { cols, rows };
-            int type = src.type();
-            if( !mask.empty() )
+            typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
+            ippiMaskMeanFuncC1 ippFuncC1 =
+            type == CV_8UC1 ? (ippiMaskMeanFuncC1)ippiMean_8u_C1MR :
+            type == CV_16UC1 ? (ippiMaskMeanFuncC1)ippiMean_16u_C1MR :
+            type == CV_32FC1 ? (ippiMaskMeanFuncC1)ippiMean_32f_C1MR :
+            0;
+            if( ippFuncC1 )
            {
-                typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
-                ippiMaskMeanFuncC1 ippFuncC1 =
-                type == CV_8UC1 ? (ippiMaskMeanFuncC1)ippiMean_8u_C1MR :
-                type == CV_16UC1 ? (ippiMaskMeanFuncC1)ippiMean_16u_C1MR :
-                type == CV_32FC1 ? (ippiMaskMeanFuncC1)ippiMean_32f_C1MR :
-                0;
-                if( ippFuncC1 )
+                Ipp64f res;
+                if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &res) >= 0 )
                {
-                    Ipp64f res;
-                    if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &res) >= 0 )
-                    {
-                        CV_IMPL_ADD(CV_IMPL_IPP);
-                        return Scalar(res);
-                    }
-                    setIppErrorStatus();
+                    ret = Scalar(res);
+                    return true;
                }
-                typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
-                ippiMaskMeanFuncC3 ippFuncC3 =
-                type == CV_8UC3 ? (ippiMaskMeanFuncC3)ippiMean_8u_C3CMR :
-                type == CV_16UC3 ? (ippiMaskMeanFuncC3)ippiMean_16u_C3CMR :
-                type == CV_32FC3 ? (ippiMaskMeanFuncC3)ippiMean_32f_C3CMR :
-                0;
-                if( ippFuncC3 )
+            }
+            typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
+            ippiMaskMeanFuncC3 ippFuncC3 =
+            type == CV_8UC3 ? (ippiMaskMeanFuncC3)ippiMean_8u_C3CMR :
+            type == CV_16UC3 ? (ippiMaskMeanFuncC3)ippiMean_16u_C3CMR :
+            type == CV_32FC3 ? (ippiMaskMeanFuncC3)ippiMean_32f_C3CMR :
+            0;
+            if( ippFuncC3 )
+            {
+                Ipp64f res1, res2, res3;
+                if( ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &res1) >= 0 &&
+                    ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &res2) >= 0 &&
+                    ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &res3) >= 0 )
                {
-                    Ipp64f res1, res2, res3;
-                    if( ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &res1) >= 0 &&
-                        ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &res2) >= 0 &&
-                        ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &res3) >= 0 )
-                    {
-                        CV_IMPL_ADD(CV_IMPL_IPP);
-                        return Scalar(res1, res2, res3);
-                    }
-                    setIppErrorStatus();
+                    ret = Scalar(res1, res2, res3);
+                    return true;
                }
            }
-            else
+        }
+        else
+        {
+            typedef IppStatus (CV_STDCALL* ippiMeanFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm);
+            typedef IppStatus (CV_STDCALL* ippiMeanFuncNoHint)(const void*, int, IppiSize, double *);
+            ippiMeanFuncHint ippFuncHint =
+                type == CV_32FC1 ? (ippiMeanFuncHint)ippiMean_32f_C1R :
+                type == CV_32FC3 ? (ippiMeanFuncHint)ippiMean_32f_C3R :
+                type == CV_32FC4 ? (ippiMeanFuncHint)ippiMean_32f_C4R :
+                0;
+            ippiMeanFuncNoHint ippFuncNoHint =
+                type == CV_8UC1 ? (ippiMeanFuncNoHint)ippiMean_8u_C1R :
+                type == CV_8UC3 ? (ippiMeanFuncNoHint)ippiMean_8u_C3R :
+                type == CV_8UC4 ? (ippiMeanFuncNoHint)ippiMean_8u_C4R :
+                type == CV_16UC1 ? (ippiMeanFuncNoHint)ippiMean_16u_C1R :
+                type == CV_16UC3 ? (ippiMeanFuncNoHint)ippiMean_16u_C3R :
+                type == CV_16UC4 ? (ippiMeanFuncNoHint)ippiMean_16u_C4R :
+                type == CV_16SC1 ? (ippiMeanFuncNoHint)ippiMean_16s_C1R :
+                type == CV_16SC3 ? (ippiMeanFuncNoHint)ippiMean_16s_C3R :
+                type == CV_16SC4 ? (ippiMeanFuncNoHint)ippiMean_16s_C4R :
+                0;
+            // Make sure only zero or one version of the function pointer is valid
+            CV_Assert(!ippFuncHint || !ippFuncNoHint);
+            if( ippFuncHint || ippFuncNoHint )
            {
-                typedef IppStatus (CV_STDCALL* ippiMeanFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm);
-                typedef IppStatus (CV_STDCALL* ippiMeanFuncNoHint)(const void*, int, IppiSize, double *);
-                ippiMeanFuncHint ippFuncHint =
-                    type == CV_32FC1 ? (ippiMeanFuncHint)ippiMean_32f_C1R :
-                    type == CV_32FC3 ? (ippiMeanFuncHint)ippiMean_32f_C3R :
-                    type == CV_32FC4 ? (ippiMeanFuncHint)ippiMean_32f_C4R :
-                    0;
-                ippiMeanFuncNoHint ippFuncNoHint =
-                    type == CV_8UC1 ? (ippiMeanFuncNoHint)ippiMean_8u_C1R :
-                    type == CV_8UC3 ? (ippiMeanFuncNoHint)ippiMean_8u_C3R :
-                    type == CV_8UC4 ? (ippiMeanFuncNoHint)ippiMean_8u_C4R :
-                    type == CV_16UC1 ? (ippiMeanFuncNoHint)ippiMean_16u_C1R :
-                    type == CV_16UC3 ? (ippiMeanFuncNoHint)ippiMean_16u_C3R :
-                    type == CV_16UC4 ? (ippiMeanFuncNoHint)ippiMean_16u_C4R :
-                    type == CV_16SC1 ? (ippiMeanFuncNoHint)ippiMean_16s_C1R :
-                    type == CV_16SC3 ? (ippiMeanFuncNoHint)ippiMean_16s_C3R :
-                    type == CV_16SC4 ? (ippiMeanFuncNoHint)ippiMean_16s_C4R :
-                    0;
-                // Make sure only zero or one version of the function pointer is valid
-                CV_Assert(!ippFuncHint || !ippFuncNoHint);
-                if( ippFuncHint || ippFuncNoHint )
+                Ipp64f res[4];
+                IppStatus status = ippFuncHint ? ippFuncHint(src.ptr(), (int)src.step[0], sz, res, ippAlgHintAccurate) :
+                                ippFuncNoHint(src.ptr(), (int)src.step[0], sz, res);
+                if( status >= 0 )
                {
-                    Ipp64f res[4];
-                    IppStatus ret = ippFuncHint ? ippFuncHint(src.ptr(), (int)src.step[0], sz, res, ippAlgHintAccurate) :
-                                    ippFuncNoHint(src.ptr(), (int)src.step[0], sz, res);
-                    if( ret >= 0 )
-                    {
-                        Scalar sc;
-                        for( int i = 0; i < cn; i++ )
-                            sc[i] = res[i];
-                        CV_IMPL_ADD(CV_IMPL_IPP);
-                        return sc;
-                    }
-                    setIppErrorStatus();
+                    for( int i = 0; i < src.channels(); i++ )
+                        ret[i] = res[i];
+                    return true;
                }
            }
        }
    }
+    return false;
+#else
+    return false;
+#endif
+}
+}
 #endif

+cv::Scalar cv::mean( InputArray _src, InputArray _mask )
+{
+    Mat src = _src.getMat(), mask = _mask.getMat();
+    CV_Assert( mask.empty() || mask.type() == CV_8U );
+
+    int k, cn = src.channels(), depth = src.depth();
+    Scalar s;
+
+    CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_mean(src, mask, s), s)
+
    SumFunc func = getSumFunc(depth);

    CV_Assert( cn <= 4 && func != 0 );
@ -1470,7 +1476,6 @@ cv::Scalar cv::mean( InputArray _src, InputArray _mask )
    const Mat* arrays[] = {&src, &mask, 0};
    uchar* ptrs[2];
    NAryMatIterator it(arrays, ptrs);
-    Scalar s;
    int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
    int j, count = 0;
    AutoBuffer<int> _buf;
@ -1640,7 +1645,7 @@ namespace cv
 {
 static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& mask)
 {
-#if IPP_VERSION_MAJOR >= 7
+#if IPP_VERSION_X100 >= 700
    int cn = src.channels();
    size_t total_size = src.total();
    int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
@ -1712,7 +1717,7 @@ static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& m
            ippiMeanStdDevFuncC1 ippFuncC1 =
            type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R :
            type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R :
-#if (IPP_VERSION_X100 >= 801)
+#if (IPP_VERSION_X100 >= 810)
            type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0
 #endif
            0;
@ -1756,7 +1761,7 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input
    Mat src = _src.getMat(), mask = _mask.getMat();
    CV_Assert( mask.empty() || mask.type() == CV_8UC1 );

-    CV_IPP_RUN(IPP_VERSION_MAJOR >= 7, ipp_meanStdDev(src, _mean, _sdv, mask));
+    CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_meanStdDev(src, _mean, _sdv, mask));

    int k, cn = src.channels(), depth = src.depth();

@ -2212,7 +2217,7 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
 #ifdef HAVE_IPP
 static bool ipp_minMaxIdx( Mat &src, double* minVal, double* maxVal, int* minIdx, int* maxIdx, Mat &mask)
 {
-#if IPP_VERSION_MAJOR >= 7
+#if IPP_VERSION_X100 >= 700
    int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
    size_t total_size = src.total();
    int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
@ -2228,7 +2233,9 @@ static bool ipp_minMaxIdx( Mat &src, double* minVal, double* maxVal, int* minIdx
            CV_SUPPRESS_DEPRECATED_START
            ippiMaskMinMaxIndxFuncC1 ippFuncC1 =
                type == CV_8UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1MR :
+#if IPP_VERSION_X100 < 900
                type == CV_8SC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8s_C1MR :
+#endif
                type == CV_16UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1MR :
                type == CV_32FC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1MR : 0;
            CV_SUPPRESS_DEPRECATED_END
@ -2265,8 +2272,12 @@ static bool ipp_minMaxIdx( Mat &src, double* minVal, double* maxVal, int* minIdx

            CV_SUPPRESS_DEPRECATED_START
            ippiMinMaxIndxFuncC1 ippFuncC1 =
+#if IPP_VERSION_X100 != 900 // bug in 9.0.0 avx2 optimization
                depth == CV_8U ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1R :
+#endif
+#if IPP_VERSION_X100 < 900
                depth == CV_8S ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8s_C1R :
+#endif
                depth == CV_16U ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1R :
 #if !((defined _MSC_VER && defined _M_IX86) || defined __i386__)
                depth == CV_32F ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1R :
@ -2320,7 +2331,7 @@ void cv::minMaxIdx(InputArray _src, double* minVal,
               ocl_minMaxIdx(_src, minVal, maxVal, minIdx, maxIdx, _mask))

    Mat src = _src.getMat(), mask = _mask.getMat();
-    CV_IPP_RUN(IPP_VERSION_MAJOR >= 7, ipp_minMaxIdx(src, minVal, maxVal, minIdx, maxIdx, mask))
+    CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_minMaxIdx(src, minVal, maxVal, minIdx, maxIdx, mask))

    MinMaxIdxFunc func = getMinmaxTab(depth);
    CV_Assert( func != 0 );
@ -2331,8 +2342,8 @@ void cv::minMaxIdx(InputArray _src, double* minVal,

    size_t minidx = 0, maxidx = 0;
    int iminval = INT_MAX, imaxval = INT_MIN;
-    float fminval = FLT_MAX, fmaxval = -FLT_MAX;
-    double dminval = DBL_MAX, dmaxval = -DBL_MAX;
+    float  fminval = std::numeric_limits<float>::infinity(),  fmaxval = -fminval;
+    double dminval = std::numeric_limits<double>::infinity(), dmaxval = -dminval;
    size_t startidx = 1;
    int *minval = &iminval, *maxval = &imaxval;
    int planeSize = (int)it.size*cn;
@ -2345,6 +2356,14 @@ void cv::minMaxIdx(InputArray _src, double* minVal,
    for( size_t i = 0; i < it.nplanes; i++, ++it, startidx += planeSize )
        func( ptrs[0], ptrs[1], minval, maxval, &minidx, &maxidx, planeSize, startidx );

+    if (!src.empty() && mask.empty())
+    {
+        if( minidx == 0 )
+             minidx = 1;
+         if( maxidx == 0 )
+             maxidx = 1;
+    }
+
    if( minidx == 0 )
        dminval = dmaxval = 0;
    else if( depth == CV_32F )
@ -2645,7 +2664,7 @@ static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double &
 #ifdef HAVE_IPP
 static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result)
 {
-#if IPP_VERSION_MAJOR >= 7
+#if IPP_VERSION_X100 >= 700
    int cn = src.channels();
    size_t total_size = src.total();
    int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
@ -2663,19 +2682,25 @@ static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result)
            ippiMaskNormFuncC1 ippFuncC1 =
                normType == NORM_INF ?
                (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8u_C1MR :
+#if IPP_VERSION_X100 < 900
                type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8s_C1MR :
+#endif
 //                type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR :
                type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_32f_C1MR :
                0) :
            normType == NORM_L1 ?
                (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8u_C1MR :
+#if IPP_VERSION_X100 < 900
                type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8s_C1MR :
+#endif
                type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_16u_C1MR :
                type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_32f_C1MR :
                0) :
            normType == NORM_L2 || normType == NORM_L2SQR ?
                (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8u_C1MR :
+#if IPP_VERSION_X100 < 900
                type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8s_C1MR :
+#endif
                type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_16u_C1MR :
                type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_32f_C1MR :
                0) : 0;
@ -2688,7 +2713,8 @@ static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result)
                    return true;
                }
            }
-            /*typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
+#if IPP_DISABLE_BLOCK
+            typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
            ippiMaskNormFuncC3 ippFuncC3 =
                normType == NORM_INF ?
                (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8u_C3CMR :
@ -2723,7 +2749,8 @@ static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result)
                    result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
                    return true;
                }
-            }*/
+            }
+#endif
        }
        else
        {
@ -2749,7 +2776,7 @@ static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result)
                type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C3R :
                type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C4R :
                type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C1R :
-#if (IPP_VERSION_X100 >= 801)
+#if (IPP_VERSION_X100 >= 810)
                type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
                type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
 #endif
@ -2829,7 +2856,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
 #endif

    Mat src = _src.getMat(), mask = _mask.getMat();
-    CV_IPP_RUN(IPP_VERSION_MAJOR >= 7, ipp_norm(src, normType, mask, _result), _result);
+    CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_norm(src, normType, mask, _result), _result);

    int depth = src.depth(), cn = src.channels();
    if( src.isContinuous() && mask.empty() )
@ -3033,7 +3060,7 @@ namespace cv
 {
 static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArray _mask, double &result)
 {
-#if IPP_VERSION_MAJOR >= 7
+#if IPP_VERSION_X100 >= 700
    Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();

    if( normType & CV_RELATIVE )
@ -3056,23 +3083,29 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra
                ippiMaskNormRelFuncC1 ippFuncC1 =
                    normType == NORM_INF ?
                    (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8u_C1MR :
+#if IPP_VERSION_X100 < 900
 #ifndef __APPLE__
                    type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8s_C1MR :
+#endif
 #endif
                    type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_16u_C1MR :
                    type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_32f_C1MR :
                    0) :
                    normType == NORM_L1 ?
                    (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8u_C1MR :
+#if IPP_VERSION_X100 < 900
 #ifndef __APPLE__
                    type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8s_C1MR :
+#endif
 #endif
                    type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_16u_C1MR :
                    type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_32f_C1MR :
                    0) :
                    normType == NORM_L2 || normType == NORM_L2SQR ?
                    (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8u_C1MR :
+#if IPP_VERSION_X100 < 900
                    type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8s_C1MR :
+#endif
                    type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_16u_C1MR :
                    type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_32f_C1MR :
                    0) : 0;
@ -3157,21 +3190,27 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra
            ippiMaskNormDiffFuncC1 ippFuncC1 =
                normType == NORM_INF ?
                (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8u_C1MR :
+#if IPP_VERSION_X100 < 900
                type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8s_C1MR :
+#endif
                type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_16u_C1MR :
                type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_32f_C1MR :
                0) :
                normType == NORM_L1 ?
                (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8u_C1MR :
+#if IPP_VERSION_X100 < 900
 #ifndef __APPLE__
                type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8s_C1MR :
+#endif
 #endif
                type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_16u_C1MR :
                type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_32f_C1MR :
                0) :
                normType == NORM_L2 || normType == NORM_L2SQR ?
                (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8u_C1MR :
+#if IPP_VERSION_X100 < 900
                type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8s_C1MR :
+#endif
                type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_16u_C1MR :
                type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_32f_C1MR :
                0) : 0;
@ -3189,19 +3228,25 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra
            ippiMaskNormDiffFuncC3 ippFuncC3 =
                normType == NORM_INF ?
                (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8u_C3CMR :
+#if IPP_VERSION_X100 < 900
                type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8s_C3CMR :
+#endif
                type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_16u_C3CMR :
                type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_32f_C3CMR :
                0) :
                normType == NORM_L1 ?
                (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8u_C3CMR :
+#if IPP_VERSION_X100 < 900
                type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8s_C3CMR :
+#endif
                type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_16u_C3CMR :
                type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_32f_C3CMR :
                0) :
                normType == NORM_L2 || normType == NORM_L2SQR ?
                (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8u_C3CMR :
+#if IPP_VERSION_X100 < 900
                type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8s_C3CMR :
+#endif
                type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_16u_C3CMR :
                type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_32f_C3CMR :
                0) : 0;
@ -3247,7 +3292,7 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra
                type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C3R :
                type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C4R :
                type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R :
-#if (IPP_VERSION_X100 >= 801)
+#if (IPP_VERSION_X100 >= 810)
                type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
                type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
 #endif
@ -3262,7 +3307,7 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra
                type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C1R :
                type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C3R :
                type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C4R :
-#if !(IPP_VERSION_X100 == 802 && (!defined(IPP_VERSION_UPDATE) || IPP_VERSION_UPDATE <= 1)) // Oct 2014: Accuracy issue with IPP 8.2 / 8.2.1
+#if !(IPP_VERSION_X100 == 820 || IPP_VERSION_X100 == 821) // Oct 2014: Accuracy issue with IPP 8.2 / 8.2.1
                type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C1R :
 #endif
                type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C3R :
@ -3326,7 +3371,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
                _result)
 #endif

-    CV_IPP_RUN(IPP_VERSION_MAJOR >= 7, ipp_norm(_src1, _src2, normType, _mask, _result), _result);
+    CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_norm(_src1, _src2, normType, _mask, _result), _result);

    if( normType & CV_RELATIVE )
    {
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@ -42,6 +42,7 @@
 //M*/

 #include "precomp.hpp"
+#include <iostream>

 namespace cv {

@ -377,21 +378,6 @@ bool checkHardwareSupport(int feature)


 volatile bool useOptimizedFlag = true;
-#ifdef HAVE_IPP
-struct IPPInitializer
-{
-    IPPInitializer(void)
-    {
-#if IPP_VERSION_MAJOR >= 8
-        ippInit();
-#else
-        ippStaticInit();
-#endif
-    }
-};
-
-IPPInitializer ippInitializer;
-#endif

 volatile bool USE_SSE2 = featuresEnabled.have[CV_CPU_SSE2];
 volatile bool USE_SSE4_2 = featuresEnabled.have[CV_CPU_SSE4_2];
@ -1034,7 +1020,7 @@ class TlsStorage
 public:
    TlsStorage()
    {
-        tlsSlots = 0;
+        tlsSlots.reserve(32);
        threads.reserve(32);
    }
    ~TlsStorage()
@ -1077,15 +1063,27 @@ public:
    size_t reserveSlot()
    {
        AutoLock guard(mtxGlobalAccess);
-        tlsSlots++;
-        return (tlsSlots-1);
+
+        // Find unused slots
+        for(size_t slot = 0; slot < tlsSlots.size(); slot++)
+        {
+            if(!tlsSlots[slot])
+            {
+                tlsSlots[slot] = 1;
+                return slot;
+            }
+        }
+
+        // Create new slot
+        tlsSlots.push_back(1);
+        return (tlsSlots.size()-1);
    }

    // Release TLS storage index and pass assosiated data to caller
    void releaseSlot(size_t slotIdx, std::vector<void*> &dataVec)
    {
        AutoLock guard(mtxGlobalAccess);
-        CV_Assert(tlsSlots > slotIdx);
+        CV_Assert(tlsSlots.size() > slotIdx);

        for(size_t i = 0; i < threads.size(); i++)
        {
@ -1096,15 +1094,14 @@ public:
                threads[i]->slots[slotIdx] = 0;
            }
        }
-        // If we removing last element, decriment slots size to save space
-        if(tlsSlots-1 == slotIdx)
-            tlsSlots--;
+
+        tlsSlots[slotIdx] = 0;
    }

    // Get data by TLS storage index
    void* getData(size_t slotIdx) const
    {
-        CV_Assert(tlsSlots > slotIdx);
+        CV_Assert(tlsSlots.size() > slotIdx);

        ThreadData* threadData = (ThreadData*)tls.GetData();
        if(threadData && threadData->slots.size() > slotIdx)
@ -1113,10 +1110,24 @@ public:
        return NULL;
    }

+    // Gather data from threads by TLS storage index
+    void gather(size_t slotIdx, std::vector<void*> &dataVec)
+    {
+        AutoLock guard(mtxGlobalAccess);
+        CV_Assert(tlsSlots.size() > slotIdx);
+
+        for(size_t i = 0; i < threads.size(); i++)
+        {
+            std::vector<void*>& thread_slots = threads[i]->slots;
+            if (thread_slots.size() > slotIdx && thread_slots[slotIdx])
+                dataVec.push_back(thread_slots[slotIdx]);
+        }
+    }
+
    // Set data to storage index
    void setData(size_t slotIdx, void* pData)
    {
-        CV_Assert(pData != NULL);
+        CV_Assert(tlsSlots.size() > slotIdx && pData != NULL);

        ThreadData* threadData = (ThreadData*)tls.GetData();
        if(!threadData)
@ -1131,7 +1142,11 @@ public:
        }

        if(slotIdx >= threadData->slots.size())
-            threadData->slots.resize(slotIdx+1);
+        {
+            AutoLock guard(mtxGlobalAccess);
+            while(slotIdx >= threadData->slots.size())
+                threadData->slots.push_back(NULL);
+        }
        threadData->slots[slotIdx] = pData;
    }

@ -1139,7 +1154,7 @@ private:
    TlsAbstraction tls; // TLS abstraction layer instance

    Mutex  mtxGlobalAccess;           // Shared objects operation guard
-    size_t tlsSlots;                  // TLS storage counter
+    std::vector<int> tlsSlots;        // TLS keys state
    std::vector<ThreadData*> threads; // Array for all allocated data. Thread data pointers are placed here to allow data cleanup
 };

@ -1159,6 +1174,11 @@ TLSDataContainer::~TLSDataContainer()
    CV_Assert(key_ == -1); // Key must be released in child object
 }

+void TLSDataContainer::gatherData(std::vector<void*> &data) const
+{
+    getTlsStorage().gather(key_, data);
+}
+
 void TLSDataContainer::release()
 {
    std::vector<void*> data;
@ -1271,26 +1291,93 @@ void setUseCollection(bool flag)
 namespace ipp
 {

-static int ippStatus = 0; // 0 - all is ok, -1 - IPP functions failed
-static const char * funcname = NULL, * filename = NULL;
-static int linen = 0;
+struct IPPInitSingelton
+{
+public:
+    IPPInitSingelton()
+    {
+        useIPP      = true;
+        ippStatus   = 0;
+        funcname    = NULL;
+        filename    = NULL;
+        linen       = 0;
+        ippFeatures = 0;
+
+#ifdef HAVE_IPP
+        const char* pIppEnv = getenv("OPENCV_IPP");
+        cv::String env = pIppEnv;
+        if(env.size())
+        {
+            if(env == "disabled")
+            {
+                std::cerr << "WARNING: IPP was disabled by OPENCV_IPP environment variable" << std::endl;
+                useIPP = false;
+            }
+#if IPP_VERSION_X100 >= 900
+            else if(env == "sse")
+                ippFeatures = ippCPUID_SSE;
+            else if(env == "sse2")
+                ippFeatures = ippCPUID_SSE2;
+            else if(env == "sse3")
+                ippFeatures = ippCPUID_SSE3;
+            else if(env == "ssse3")
+                ippFeatures = ippCPUID_SSSE3;
+            else if(env == "sse41")
+                ippFeatures = ippCPUID_SSE41;
+            else if(env == "sse42")
+                ippFeatures = ippCPUID_SSE42;
+            else if(env == "avx")
+                ippFeatures = ippCPUID_AVX;
+            else if(env == "avx2")
+                ippFeatures = ippCPUID_AVX2;
+#endif
+            else
+                std::cerr << "ERROR: Improper value of OPENCV_IPP: " << env.c_str() << std::endl;
+        }
+
+        IPP_INITIALIZER(ippFeatures)
+#endif
+    }
+
+    bool useIPP;
+
+    int         ippStatus; // 0 - all is ok, -1 - IPP functions failed
+    const char *funcname;
+    const char *filename;
+    int         linen;
+    int         ippFeatures;
+};
+
+static IPPInitSingelton& getIPPSingelton()
+{
+    CV_SINGLETON_LAZY_INIT_REF(IPPInitSingelton, new IPPInitSingelton())
+}
+
+int getIppFeatures()
+{
+#ifdef HAVE_IPP
+    return getIPPSingelton().ippFeatures;
+#else
+    return 0;
+#endif
+}

 void setIppStatus(int status, const char * const _funcname, const char * const _filename, int _line)
 {
-    ippStatus = status;
-    funcname = _funcname;
-    filename = _filename;
-    linen = _line;
+    getIPPSingelton().ippStatus = status;
+    getIPPSingelton().funcname = _funcname;
+    getIPPSingelton().filename = _filename;
+    getIPPSingelton().linen = _line;
 }

 int getIppStatus()
 {
-    return ippStatus;
+    return getIPPSingelton().ippStatus;
 }

 String getIppErrorLocation()
 {
-    return format("%s:%d %s", filename ? filename : "", linen, funcname ? funcname : "");
+    return format("%s:%d %s", getIPPSingelton().filename ? getIPPSingelton().filename : "", getIPPSingelton().linen, getIPPSingelton().funcname ? getIPPSingelton().funcname : "");
 }

 bool useIPP()
@ -1299,11 +1386,7 @@ bool useIPP()
    CoreTLSData* data = getCoreTlsData().get();
    if(data->useIPP < 0)
    {
-        const char* pIppEnv = getenv("OPENCV_IPP");
-        if(pIppEnv && (cv::String(pIppEnv) == "disabled"))
-            data->useIPP = false;
-        else
-            data->useIPP = true;
+        data->useIPP = getIPPSingelton().useIPP;
    }
    return (data->useIPP > 0);
 #else
--- a/modules/core/src/umatrix.cpp
+++ b/modules/core/src/umatrix.cpp
@ -60,25 +60,71 @@ static Mutex umatLocks[UMAT_NLOCKS];
 UMatData::UMatData(const MatAllocator* allocator)
 {
    prevAllocator = currAllocator = allocator;
-    urefcount = refcount = 0;
+    urefcount = refcount = mapcount = 0;
    data = origdata = 0;
    size = 0;
    flags = 0;
    handle = 0;
    userdata = 0;
    allocatorFlags_ = 0;
+    originalUMatData = NULL;
 }

 UMatData::~UMatData()
 {
    prevAllocator = currAllocator = 0;
    urefcount = refcount = 0;
+    CV_Assert(mapcount == 0);
    data = origdata = 0;
    size = 0;
    flags = 0;
    handle = 0;
    userdata = 0;
    allocatorFlags_ = 0;
+    if (originalUMatData)
+    {
+        UMatData* u = originalUMatData;
+        CV_XADD(&(u->urefcount), -1);
+        CV_XADD(&(u->refcount), -1);
+        bool showWarn = false;
+        if (u->refcount == 0)
+        {
+            if (u->urefcount > 0)
+                showWarn = true;
+            // simulate Mat::deallocate
+            if (u->mapcount != 0)
+            {
+                (u->currAllocator ? u->currAllocator : /* TODO allocator ? allocator :*/ Mat::getStdAllocator())->unmap(u);
+            }
+            else
+            {
+                // we don't do "map", so we can't do "unmap"
+            }
+        }
+        if (u->refcount == 0 && u->urefcount == 0) // oops, we need to free resources
+        {
+            showWarn = true;
+            // simulate UMat::deallocate
+            u->currAllocator->deallocate(u);
+        }
+#ifndef NDEBUG
+        if (showWarn)
+        {
+            static int warn_message_showed = 0;
+            if (warn_message_showed++ < 100)
+            {
+                fflush(stdout);
+                fprintf(stderr, "\n! OPENCV warning: getUMat()/getMat() call chain possible problem."
+                                "\n!                 Base object is dead, while nested/derived object is still alive or processed."
+                                "\n!                 Please check lifetime of UMat/Mat objects!\n");
+                fflush(stderr);
+            }
+        }
+#else
+        (void)showWarn;
+#endif
+        originalUMatData = NULL;
+    }
 }

 void UMatData::lock()
@ -221,19 +267,34 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
    UMat hdr;
    if(!data)
        return hdr;
+    Size wholeSize;
+    Point ofs;
+    locateROI(wholeSize, ofs);
+    Size sz(cols, rows);
+    if (ofs.x != 0 || ofs.y != 0)
+    {
+        Mat src = *this;
+        int dtop = ofs.y;
+        int dbottom = wholeSize.height - src.rows - ofs.y;
+        int dleft = ofs.x;
+        int dright = wholeSize.width - src.cols - ofs.x;
+        src.adjustROI(dtop, dbottom, dleft, dright);
+        return src.getUMat(accessFlags, usageFlags)(cv::Rect(ofs.x, ofs.y, sz.width, sz.height));
+    }
+    CV_Assert(data == datastart);
+
    accessFlags |= ACCESS_RW;
-    UMatData* temp_u = u;
-    if(!temp_u)
+    UMatData* new_u = NULL;
    {
        MatAllocator *a = allocator, *a0 = getStdAllocator();
        if(!a)
            a = a0;
-        temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags);
+        new_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags);
    }
    bool allocated = false;
    try
    {
-        allocated = UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags);
+        allocated = UMat::getStdAllocator()->allocate(new_u, accessFlags, usageFlags);
    }
    catch (const cv::Exception& e)
    {
@ -241,14 +302,26 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
    }
    if (!allocated)
    {
-        allocated = getStdAllocator()->allocate(temp_u, accessFlags, usageFlags);
+        allocated = getStdAllocator()->allocate(new_u, accessFlags, usageFlags);
        CV_Assert(allocated);
    }
+    if (u != NULL)
+    {
+#ifdef HAVE_OPENCL
+        if (ocl::useOpenCL() && new_u->currAllocator == ocl::getOpenCLAllocator())
+        {
+            CV_Assert(new_u->tempUMat());
+        }
+#endif
+        new_u->originalUMatData = u;
+        CV_XADD(&(u->refcount), 1);
+        CV_XADD(&(u->urefcount), 1);
+    }
    hdr.flags = flags;
    setSize(hdr, dims, size.p, step.p);
    finalizeHdr(hdr);
-    hdr.u = temp_u;
-    hdr.offset = data - datastart;
+    hdr.u = new_u;
+    hdr.offset = 0; //data - datastart;
    hdr.addref();
    return hdr;
 }
@ -639,16 +712,25 @@ Mat UMat::getMat(int accessFlags) const
        return Mat();
    // TODO Support ACCESS_READ (ACCESS_WRITE) without unnecessary data transfers
    accessFlags |= ACCESS_RW;
-    u->currAllocator->map(u, accessFlags);
-    CV_Assert(u->data != 0);
-    Mat hdr(dims, size.p, type(), u->data + offset, step.p);
-    hdr.flags = flags;
-    hdr.u = u;
-    hdr.datastart = u->data;
-    hdr.data = u->data + offset;
-    hdr.datalimit = hdr.dataend = u->data + u->size;
-    CV_XADD(&hdr.u->refcount, 1);
-    return hdr;
+    UMatDataAutoLock autolock(u);
+    if(CV_XADD(&u->refcount, 1) == 0)
+        u->currAllocator->map(u, accessFlags);
+    if (u->data != 0)
+    {
+        Mat hdr(dims, size.p, type(), u->data + offset, step.p);
+        hdr.flags = flags;
+        hdr.u = u;
+        hdr.datastart = u->data;
+        hdr.data = u->data + offset;
+        hdr.datalimit = hdr.dataend = u->data + u->size;
+        return hdr;
+    }
+    else
+    {
+        CV_XADD(&u->refcount, -1);
+        CV_Assert(u->data != 0 && "Error mapping of UMat to host memory.");
+        return Mat();
+    }
 }

 void* UMat::handle(int accessFlags) const
@ -656,10 +738,10 @@ void* UMat::handle(int accessFlags) const
    if( !u )
        return 0;

-    // check flags: if CPU copy is newer, copy it back to GPU.
-    if( u->deviceCopyObsolete() )
+    CV_Assert(u->refcount == 0);
+    CV_Assert(!u->deviceCopyObsolete() || u->copyOnMap());
+    if (u->deviceCopyObsolete())
    {
-        CV_Assert(u->refcount == 0 || u->origdata);
        u->currAllocator->unmap(u);
    }

@ -758,7 +840,7 @@ void UMat::copyTo(OutputArray _dst, InputArray _mask) const
                   haveDstUninit ? ocl::KernelArg::WriteOnly(dst) :
                                   ocl::KernelArg::ReadWrite(dst));

-            size_t globalsize[2] = { cols, rows };
+            size_t globalsize[2] = { (size_t)cols, (size_t)rows };
            if (k.run(2, globalsize, NULL, false))
            {
                CV_IMPL_ADD(CV_IMPL_OCL);
@ -819,7 +901,7 @@ void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) con
            else
                k.args(srcarg, dstarg, alpha, beta, rowsPerWI);

-            size_t globalsize[2] = { dst.cols * cn, (dst.rows + rowsPerWI - 1) / rowsPerWI };
+            size_t globalsize[2] = { (size_t)dst.cols * cn, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
            if (k.run(2, globalsize, NULL, false))
            {
                CV_IMPL_ADD(CV_IMPL_OCL);
@ -875,7 +957,7 @@ UMat& UMat::setTo(InputArray _value, InputArray _mask)
                setK.args(dstarg, scalararg);
            }

-            size_t globalsize[] = { cols * cn / kercn, (rows + rowsPerWI - 1) / rowsPerWI };
+            size_t globalsize[] = { (size_t)cols * cn / kercn, ((size_t)rows + rowsPerWI - 1) / rowsPerWI };
            if( setK.run(2, globalsize, NULL, false) )
            {
                CV_IMPL_ADD(CV_IMPL_OCL);
--- a/modules/core/src/va_intel.cpp
+++ b/modules/core/src/va_intel.cpp
@ -0,0 +1,528 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2015, Itseez, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#include "precomp.hpp"
+
+#ifdef HAVE_VA
+#  include <va/va.h>
+#else  // HAVE_VA
+#  define NO_VA_SUPPORT_ERROR CV_ErrorNoReturn(cv::Error::StsBadFunc, "OpenCV was build without VA support (libva)")
+#endif // HAVE_VA
+
+using namespace cv;
+
+////////////////////////////////////////////////////////////////////////
+// CL-VA Interoperability
+
+#ifdef HAVE_OPENCL
+#  include "opencv2/core/opencl/runtime/opencl_core.hpp"
+#  include "opencv2/core.hpp"
+#  include "opencv2/core/ocl.hpp"
+#  include "opencl_kernels_core.hpp"
+#endif // HAVE_OPENCL
+
+#if defined(HAVE_VA_INTEL) && defined(HAVE_OPENCL)
+#  include <CL/va_ext.h>
+#endif // HAVE_VA_INTEL && HAVE_OPENCL
+
+namespace cv { namespace va_intel {
+
+#if defined(HAVE_VA_INTEL) && defined(HAVE_OPENCL)
+
+static clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn clGetDeviceIDsFromVA_APIMediaAdapterINTEL = NULL;
+static clCreateFromVA_APIMediaSurfaceINTEL_fn       clCreateFromVA_APIMediaSurfaceINTEL       = NULL;
+static clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn  clEnqueueAcquireVA_APIMediaSurfacesINTEL  = NULL;
+static clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn  clEnqueueReleaseVA_APIMediaSurfacesINTEL  = NULL;
+
+static bool contextInitialized = false;
+
+#endif // HAVE_VA_INTEL && HAVE_OPENCL
+
+namespace ocl {
+
+Context& initializeContextFromVA(VADisplay display, bool tryInterop)
+{
+    (void)display; (void)tryInterop;
+#if !defined(HAVE_VA)
+    NO_VA_SUPPORT_ERROR;
+#else  // !HAVE_VA
+# if (defined(HAVE_VA_INTEL) && defined(HAVE_OPENCL))
+    contextInitialized = false;
+    if (tryInterop)
+    {
+        cl_uint numPlatforms;
+        cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
+        if (status != CL_SUCCESS)
+            CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get number of platforms");
+        if (numPlatforms == 0)
+            CV_Error(cv::Error::OpenCLInitError, "OpenCL: No available platforms");
+
+        std::vector<cl_platform_id> platforms(numPlatforms);
+        status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);
+        if (status != CL_SUCCESS)
+            CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get platform Id list");
+
+        // For CL-VA interop, we must find platform/device with "cl_intel_va_api_media_sharing" extension.
+        // With standard initialization procedure, we should examine platform extension string for that.
+        // But in practice, the platform ext string doesn't contain it, while device ext string does.
+        // Follow Intel procedure (see tutorial), we should obtain device IDs by extension call.
+        // Note that we must obtain function pointers using specific platform ID, and can't provide pointers in advance.
+        // So, we iterate and select the first platform, for which we got non-NULL pointers, device, and CL context.
+
+        int found = -1;
+        cl_context context = 0;
+        cl_device_id device = 0;
+
+        for (int i = 0; i < (int)numPlatforms; ++i)
+        {
+            // Get extension function pointers
+
+            clGetDeviceIDsFromVA_APIMediaAdapterINTEL = (clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)
+                clGetExtensionFunctionAddressForPlatform(platforms[i], "clGetDeviceIDsFromVA_APIMediaAdapterINTEL");
+            clCreateFromVA_APIMediaSurfaceINTEL       = (clCreateFromVA_APIMediaSurfaceINTEL_fn)
+                clGetExtensionFunctionAddressForPlatform(platforms[i], "clCreateFromVA_APIMediaSurfaceINTEL");
+            clEnqueueAcquireVA_APIMediaSurfacesINTEL  = (clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)
+                clGetExtensionFunctionAddressForPlatform(platforms[i], "clEnqueueAcquireVA_APIMediaSurfacesINTEL");
+            clEnqueueReleaseVA_APIMediaSurfacesINTEL  = (clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)
+                clGetExtensionFunctionAddressForPlatform(platforms[i], "clEnqueueReleaseVA_APIMediaSurfacesINTEL");
+
+            if (((void*)clGetDeviceIDsFromVA_APIMediaAdapterINTEL == NULL) ||
+                ((void*)clCreateFromVA_APIMediaSurfaceINTEL == NULL) ||
+                ((void*)clEnqueueAcquireVA_APIMediaSurfacesINTEL == NULL) ||
+                ((void*)clEnqueueReleaseVA_APIMediaSurfacesINTEL == NULL))
+            {
+                continue;
+            }
+
+            // Query device list
+
+            cl_uint numDevices = 0;
+
+            status = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(platforms[i], CL_VA_API_DISPLAY_INTEL, display,
+                                                               CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, 0, NULL, &numDevices);
+            if ((status != CL_SUCCESS) || !(numDevices > 0))
+                continue;
+            numDevices = 1; // initializeContextFromHandle() expects only 1 device
+            status = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(platforms[i], CL_VA_API_DISPLAY_INTEL, display,
+                                                               CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, numDevices, &device, NULL);
+            if (status != CL_SUCCESS)
+                continue;
+
+            // Creating CL-VA media sharing OpenCL context
+
+            cl_context_properties props[] = {
+                CL_CONTEXT_VA_API_DISPLAY_INTEL, (cl_context_properties) display,
+                CL_CONTEXT_INTEROP_USER_SYNC, CL_FALSE, // no explicit sync required
+                0
+            };
+
+            context = clCreateContext(props, numDevices, &device, NULL, NULL, &status);
+            if (status != CL_SUCCESS)
+            {
+                clReleaseDevice(device);
+            }
+            else
+            {
+                found = i;
+                break;
+            }
+        }
+
+        if (found >= 0)
+        {
+            contextInitialized = true;
+            Context& ctx = Context::getDefault(false);
+            initializeContextFromHandle(ctx, platforms[found], context, device);
+            return ctx;
+        }
+    }
+# endif // HAVE_VA_INTEL && HAVE_OPENCL
+    {
+        Context& ctx = Context::getDefault(true);
+        return ctx;
+    }
+#endif  // !HAVE_VA
+}
+
+#if defined(HAVE_VA_INTEL) && defined(HAVE_OPENCL)
+static bool ocl_convert_nv12_to_bgr(cl_mem clImageY, cl_mem clImageUV, cl_mem clBuffer, int step, int cols, int rows)
+{
+    ocl::Kernel k;
+    k.create("YUV2BGR_NV12_8u", cv::ocl::core::cvtclr_dx_oclsrc, "");
+    if (k.empty())
+        return false;
+
+    k.args(clImageY, clImageUV, clBuffer, step, cols, rows);
+
+    size_t globalsize[] = { (size_t)cols, (size_t)rows };
+    return k.run(2, globalsize, 0, false);
+}
+
+static bool ocl_convert_bgr_to_nv12(cl_mem clBuffer, int step, int cols, int rows, cl_mem clImageY, cl_mem clImageUV)
+{
+    ocl::Kernel k;
+    k.create("BGR2YUV_NV12_8u", cv::ocl::core::cvtclr_dx_oclsrc, "");
+    if (k.empty())
+        return false;
+
+    k.args(clBuffer, step, cols, rows, clImageY, clImageUV);
+
+    size_t globalsize[] = { (size_t)cols, (size_t)rows };
+    return k.run(2, globalsize, 0, false);
+}
+#endif // HAVE_VA_INTEL && HAVE_OPENCL
+
+} // namespace cv::va_intel::ocl
+
+#if defined(HAVE_VA)
+const int NCHANNELS = 3;
+
+static void copy_convert_nv12_to_bgr(const VAImage& image, const unsigned char* buffer, Mat& bgr)
+{
+    const float d1 = 16.0f;
+    const float d2 = 128.0f;
+
+    static const float coeffs[5] =
+        {
+            1.163999557f,
+            2.017999649f,
+            -0.390999794f,
+            -0.812999725f,
+            1.5959997177f
+        };
+
+    const size_t srcOffsetY = image.offsets[0];
+    const size_t srcOffsetUV = image.offsets[1];
+
+    const size_t srcStepY = image.pitches[0];
+    const size_t srcStepUV = image.pitches[1];
+
+    const size_t dstStep = bgr.step;
+
+    const unsigned char* srcY0 = buffer + srcOffsetY;
+    const unsigned char* srcUV = buffer + srcOffsetUV;
+
+    unsigned char* dst0 = bgr.data;
+
+    for (int y = 0; y < bgr.rows; y += 2)
+    {
+        const unsigned char* srcY1 = srcY0 + srcStepY;
+        unsigned char *dst1 = dst0 + dstStep;
+
+        for (int x = 0; x < bgr.cols; x += 2)
+        {
+            float Y0 = float(srcY0[x+0]);
+            float Y1 = float(srcY0[x+1]);
+            float Y2 = float(srcY1[x+0]);
+            float Y3 = float(srcY1[x+1]);
+
+            float U = float(srcUV[2*(x/2)+0]) - d2;
+            float V = float(srcUV[2*(x/2)+1]) - d2;
+
+            Y0 = std::max(0.0f, Y0 - d1) * coeffs[0];
+            Y1 = std::max(0.0f, Y1 - d1) * coeffs[0];
+            Y2 = std::max(0.0f, Y2 - d1) * coeffs[0];
+            Y3 = std::max(0.0f, Y3 - d1) * coeffs[0];
+
+            float ruv = coeffs[4]*V;
+            float guv = coeffs[3]*V + coeffs[2]*U;
+            float buv = coeffs[1]*U;
+
+            dst0[(x+0)*NCHANNELS+0] = saturate_cast<unsigned char>(Y0 + buv);
+            dst0[(x+0)*NCHANNELS+1] = saturate_cast<unsigned char>(Y0 + guv);
+            dst0[(x+0)*NCHANNELS+2] = saturate_cast<unsigned char>(Y0 + ruv);
+
+            dst0[(x+1)*NCHANNELS+0] = saturate_cast<unsigned char>(Y1 + buv);
+            dst0[(x+1)*NCHANNELS+1] = saturate_cast<unsigned char>(Y1 + guv);
+            dst0[(x+1)*NCHANNELS+2] = saturate_cast<unsigned char>(Y1 + ruv);
+
+            dst1[(x+0)*NCHANNELS+0] = saturate_cast<unsigned char>(Y2 + buv);
+            dst1[(x+0)*NCHANNELS+1] = saturate_cast<unsigned char>(Y2 + guv);
+            dst1[(x+0)*NCHANNELS+2] = saturate_cast<unsigned char>(Y2 + ruv);
+
+            dst1[(x+1)*NCHANNELS+0] = saturate_cast<unsigned char>(Y3 + buv);
+            dst1[(x+1)*NCHANNELS+1] = saturate_cast<unsigned char>(Y3 + guv);
+            dst1[(x+1)*NCHANNELS+2] = saturate_cast<unsigned char>(Y3 + ruv);
+        }
+
+        srcY0 = srcY1 + srcStepY;
+        srcUV += srcStepUV;
+        dst0 = dst1 + dstStep;
+    }
+}
+
+static void copy_convert_bgr_to_nv12(const VAImage& image, const Mat& bgr, unsigned char* buffer)
+{
+    const float d1 = 16.0f;
+    const float d2 = 128.0f;
+
+    static const float coeffs[8] =
+        {
+            0.256999969f,  0.50399971f,   0.09799957f,   -0.1479988098f,
+            -0.2909994125f, 0.438999176f, -0.3679990768f, -0.0709991455f
+        };
+
+    const size_t dstOffsetY = image.offsets[0];
+    const size_t dstOffsetUV = image.offsets[1];
+
+    const size_t dstStepY = image.pitches[0];
+    const size_t dstStepUV = image.pitches[1];
+
+    const size_t srcStep = bgr.step;
+
+    const unsigned char* src0 = bgr.data;
+
+    unsigned char* dstY0 = buffer + dstOffsetY;
+    unsigned char* dstUV = buffer + dstOffsetUV;
+
+    for (int y = 0; y < bgr.rows; y += 2)
+    {
+        const unsigned char *src1 = src0 + srcStep;
+        unsigned char* dstY1 = dstY0 + dstStepY;
+
+        for (int x = 0; x < bgr.cols; x += 2)
+        {
+            float B0 = float(src0[(x+0)*NCHANNELS+0]);
+            float G0 = float(src0[(x+0)*NCHANNELS+1]);
+            float R0 = float(src0[(x+0)*NCHANNELS+2]);
+
+            float B1 = float(src0[(x+1)*NCHANNELS+0]);
+            float G1 = float(src0[(x+1)*NCHANNELS+1]);
+            float R1 = float(src0[(x+1)*NCHANNELS+2]);
+
+            float B2 = float(src1[(x+0)*NCHANNELS+0]);
+            float G2 = float(src1[(x+0)*NCHANNELS+1]);
+            float R2 = float(src1[(x+0)*NCHANNELS+2]);
+
+            float B3 = float(src1[(x+1)*NCHANNELS+0]);
+            float G3 = float(src1[(x+1)*NCHANNELS+1]);
+            float R3 = float(src1[(x+1)*NCHANNELS+2]);
+
+            float Y0 = coeffs[0]*R0 + coeffs[1]*G0 + coeffs[2]*B0 + d1;
+            float Y1 = coeffs[0]*R1 + coeffs[1]*G1 + coeffs[2]*B1 + d1;
+            float Y2 = coeffs[0]*R2 + coeffs[1]*G2 + coeffs[2]*B2 + d1;
+            float Y3 = coeffs[0]*R3 + coeffs[1]*G3 + coeffs[2]*B3 + d1;
+
+            float U = coeffs[3]*R0 + coeffs[4]*G0 + coeffs[5]*B0 + d2;
+            float V = coeffs[5]*R0 + coeffs[6]*G0 + coeffs[7]*B0 + d2;
+
+            dstY0[x+0] = saturate_cast<unsigned char>(Y0);
+            dstY0[x+1] = saturate_cast<unsigned char>(Y1);
+            dstY1[x+0] = saturate_cast<unsigned char>(Y2);
+            dstY1[x+1] = saturate_cast<unsigned char>(Y3);
+
+            dstUV[2*(x/2)+0] = saturate_cast<unsigned char>(U);
+            dstUV[2*(x/2)+1] = saturate_cast<unsigned char>(V);
+        }
+
+        src0 = src1 + srcStep;
+        dstY0 = dstY1 + dstStepY;
+        dstUV += dstStepUV;
+    }
+}
+#endif // HAVE_VA
+
+void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface, Size size)
+{
+    (void)display; (void)src; (void)surface; (void)size;
+#if !defined(HAVE_VA)
+    NO_VA_SUPPORT_ERROR;
+#else  // !HAVE_VA
+    const int stype = CV_8UC3;
+
+    int srcType = src.type();
+    CV_Assert(srcType == stype);
+
+    Size srcSize = src.size();
+    CV_Assert(srcSize.width == size.width && srcSize.height == size.height);
+
+# if (defined(HAVE_VA_INTEL) && defined(HAVE_OPENCL))
+    if (contextInitialized)
+    {
+        UMat u = src.getUMat();
+
+        // TODO Add support for roi
+        CV_Assert(u.offset == 0);
+        CV_Assert(u.isContinuous());
+
+        cl_mem clBuffer = (cl_mem)u.handle(ACCESS_READ);
+
+        using namespace cv::ocl;
+        Context& ctx = Context::getDefault();
+        cl_context context = (cl_context)ctx.ptr();
+
+        cl_int status = 0;
+
+        cl_mem clImageY = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_WRITE_ONLY, &surface, 0, &status);
+        if (status != CL_SUCCESS)
+            CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (Y plane)");
+        cl_mem clImageUV = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_WRITE_ONLY, &surface, 1, &status);
+        if (status != CL_SUCCESS)
+            CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (UV plane)");
+
+        cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
+
+        cl_mem images[2] = { clImageY, clImageUV };
+        status = clEnqueueAcquireVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL);
+        if (status != CL_SUCCESS)
+            CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireVA_APIMediaSurfacesINTEL failed");
+        if (!ocl::ocl_convert_bgr_to_nv12(clBuffer, (int)u.step[0], u.cols, u.rows, clImageY, clImageUV))
+            CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: ocl_convert_bgr_to_nv12 failed");
+        clEnqueueReleaseVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL);
+        if (status != CL_SUCCESS)
+            CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseVA_APIMediaSurfacesINTEL failed");
+
+        status = clFinish(q); // TODO Use events
+        if (status != CL_SUCCESS)
+            CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clFinish failed");
+
+        status = clReleaseMemObject(clImageY); // TODO RAII
+        if (status != CL_SUCCESS)
+            CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clReleaseMem failed (Y plane)");
+        status = clReleaseMemObject(clImageUV);
+        if (status != CL_SUCCESS)
+            CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clReleaseMem failed (UV plane)");
+    }
+    else
+# endif // HAVE_VA_INTEL && HAVE_OPENCL
+    {
+        Mat m = src.getMat();
+
+        // TODO Add support for roi
+        CV_Assert(m.data == m.datastart);
+        CV_Assert(m.isContinuous());
+
+        VAStatus status = 0;
+
+        status = vaSyncSurface(display, surface);
+        if (status != VA_STATUS_SUCCESS)
+            CV_Error(cv::Error::StsError, "VA-API: vaSyncSurface failed");
+
+        VAImage image;
+        status = vaDeriveImage(display, surface, &image);
+        if (status != VA_STATUS_SUCCESS)
+            CV_Error(cv::Error::StsError, "VA-API: vaDeriveImage failed");
+
+        unsigned char* buffer = 0;
+        status = vaMapBuffer(display, image.buf, (void **)&buffer);
+        if (status != VA_STATUS_SUCCESS)
+            CV_Error(cv::Error::StsError, "VA-API: vaMapBuffer failed");
+
+        CV_Assert(image.format.fourcc == VA_FOURCC_NV12);
+
+        copy_convert_bgr_to_nv12(image, m, buffer);
+
+        status = vaUnmapBuffer(display, image.buf);
+        if (status != VA_STATUS_SUCCESS)
+            CV_Error(cv::Error::StsError, "VA-API: vaUnmapBuffer failed");
+
+        status = vaDestroyImage(display, image.image_id);
+        if (status != VA_STATUS_SUCCESS)
+            CV_Error(cv::Error::StsError, "VA-API: vaDestroyImage failed");
+    }
+#endif  // !HAVE_VA
+}
+
+void convertFromVASurface(VADisplay display, VASurfaceID surface, Size size, OutputArray dst)
+{
+    (void)display; (void)surface; (void)dst; (void)size;
+#if !defined(HAVE_VA)
+    NO_VA_SUPPORT_ERROR;
+#else  // !HAVE_VA
+    const int dtype = CV_8UC3;
+
+    // TODO Need to specify ACCESS_WRITE here somehow to prevent useless data copying!
+    dst.create(size, dtype);
+
+# if (defined(HAVE_VA_INTEL) && defined(HAVE_OPENCL))
+    if (contextInitialized)
+    {
+        UMat u = dst.getUMat();
+
+        // TODO Add support for roi
+        CV_Assert(u.offset == 0);
+        CV_Assert(u.isContinuous());
+
+        cl_mem clBuffer = (cl_mem)u.handle(ACCESS_WRITE);
+
+        using namespace cv::ocl;
+        Context& ctx = Context::getDefault();
+        cl_context context = (cl_context)ctx.ptr();
+
+        cl_int status = 0;
+
+        cl_mem clImageY = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_READ_ONLY, &surface, 0, &status);
+        if (status != CL_SUCCESS)
+            CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (Y plane)");
+        cl_mem clImageUV = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_READ_ONLY, &surface, 1, &status);
+        if (status != CL_SUCCESS)
+            CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (UV plane)");
+
+        cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
+
+        cl_mem images[2] = { clImageY, clImageUV };
+        status = clEnqueueAcquireVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL);
+        if (status != CL_SUCCESS)
+            CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireVA_APIMediaSurfacesINTEL failed");
+        if (!ocl::ocl_convert_nv12_to_bgr(clImageY, clImageUV, clBuffer, (int)u.step[0], u.cols, u.rows))
+            CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: ocl_convert_nv12_to_bgr failed");
+        status = clEnqueueReleaseVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL);
+        if (status != CL_SUCCESS)
+            CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseVA_APIMediaSurfacesINTEL failed");
+
+        status = clFinish(q); // TODO Use events
+        if (status != CL_SUCCESS)
+            CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clFinish failed");
+
+        status = clReleaseMemObject(clImageY); // TODO RAII
+        if (status != CL_SUCCESS)
+            CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clReleaseMem failed (Y plane)");
+        status = clReleaseMemObject(clImageUV);
+        if (status != CL_SUCCESS)
+            CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clReleaseMem failed (UV plane)");
+    }
+    else
+# endif // HAVE_VA_INTEL && HAVE_OPENCL
+    {
+        Mat m = dst.getMat();
+
+        // TODO Add support for roi
+        CV_Assert(m.data == m.datastart);
+        CV_Assert(m.isContinuous());
+
+        VAStatus status = 0;
+
+        status = vaSyncSurface(display, surface);
+        if (status != VA_STATUS_SUCCESS)
+            CV_Error(cv::Error::StsError, "VA-API: vaSyncSurface failed");
+
+        VAImage image;
+        status = vaDeriveImage(display, surface, &image);
+        if (status != VA_STATUS_SUCCESS)
+            CV_Error(cv::Error::StsError, "VA-API: vaDeriveImage failed");
+
+        unsigned char* buffer = 0;
+        status = vaMapBuffer(display, image.buf, (void **)&buffer);
+        if (status != VA_STATUS_SUCCESS)
+            CV_Error(cv::Error::StsError, "VA-API: vaMapBuffer failed");
+
+        CV_Assert(image.format.fourcc == VA_FOURCC_NV12);
+
+        copy_convert_nv12_to_bgr(image, buffer, m);
+
+        status = vaUnmapBuffer(display, image.buf);
+        if (status != VA_STATUS_SUCCESS)
+            CV_Error(cv::Error::StsError, "VA-API: vaUnmapBuffer failed");
+
+        status = vaDestroyImage(display, image.image_id);
+        if (status != VA_STATUS_SUCCESS)
+            CV_Error(cv::Error::StsError, "VA-API: vaDestroyImage failed");
+    }
+#endif  // !HAVE_VA
+}
+
+}} // namespace cv::va_intel
--- a/modules/core/src/vaapi.cpp
+++ b/modules/core/src/vaapi.cpp
@ -1,302 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-
-// Copyright (C) 2015, Itseez, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-
-#include "precomp.hpp"
-
-#ifdef HAVE_VAAPI
-#else // HAVE_VAAPI
-#  define NO_VAAPI_SUPPORT_ERROR CV_ErrorNoReturn(cv::Error::StsBadFunc, "OpenCV was build without VA-API support")
-#endif // HAVE_VAAPI
-
-using namespace cv;
-
-////////////////////////////////////////////////////////////////////////
-// CL-VA Interoperability
-
-#ifdef HAVE_OPENCL
-#  include "opencv2/core/opencl/runtime/opencl_core.hpp"
-#  include "opencv2/core.hpp"
-#  include "opencv2/core/ocl.hpp"
-#  include "opencl_kernels_core.hpp"
-#else // HAVE_OPENCL
-#  define NO_OPENCL_SUPPORT_ERROR CV_ErrorNoReturn(cv::Error::StsBadFunc, "OpenCV was build without OpenCL support")
-#endif // HAVE_OPENCL
-
-#if defined(HAVE_VAAPI) && defined(HAVE_OPENCL)
-#  include <CL/va_ext.h>
-#endif // HAVE_VAAPI && HAVE_OPENCL
-
-namespace cv { namespace vaapi {
-
-#if defined(HAVE_VAAPI) && defined(HAVE_OPENCL)
-
-static clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn clGetDeviceIDsFromVA_APIMediaAdapterINTEL = NULL;
-static clCreateFromVA_APIMediaSurfaceINTEL_fn       clCreateFromVA_APIMediaSurfaceINTEL       = NULL;
-static clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn  clEnqueueAcquireVA_APIMediaSurfacesINTEL  = NULL;
-static clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn  clEnqueueReleaseVA_APIMediaSurfacesINTEL  = NULL;
-
-static bool contextInitialized = false;
-
-#endif // HAVE_VAAPI && HAVE_OPENCL
-
-namespace ocl {
-
-Context& initializeContextFromVA(VADisplay display)
-{
-    (void)display;
-#if !defined(HAVE_VAAPI)
-    NO_VAAPI_SUPPORT_ERROR;
-#elif !defined(HAVE_OPENCL)
-    NO_OPENCL_SUPPORT_ERROR;
-#else
-    contextInitialized = false;
-
-    cl_uint numPlatforms;
-    cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
-    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get number of platforms");
-    if (numPlatforms == 0)
-        CV_Error(cv::Error::OpenCLInitError, "OpenCL: No available platforms");
-
-    std::vector<cl_platform_id> platforms(numPlatforms);
-    status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);
-    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get platform Id list");
-
-    // For CL-VA interop, we must find platform/device with "cl_intel_va_api_media_sharing" extension.
-    // With standard initialization procedure, we should examine platform extension string for that.
-    // But in practice, the platform ext string doesn't contain it, while device ext string does.
-    // Follow Intel procedure (see tutorial), we should obtain device IDs by extension call.
-    // Note that we must obtain function pointers using specific platform ID, and can't provide pointers in advance.
-    // So, we iterate and select the first platform, for which we got non-NULL pointers, device, and CL context.
-
-    int found = -1;
-    cl_context context = 0;
-    cl_device_id device = 0;
-
-    for (int i = 0; i < (int)numPlatforms; ++i)
-    {
-        // Get extension function pointers
-
-        clGetDeviceIDsFromVA_APIMediaAdapterINTEL = (clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)
-            clGetExtensionFunctionAddressForPlatform(platforms[i], "clGetDeviceIDsFromVA_APIMediaAdapterINTEL");
-        clCreateFromVA_APIMediaSurfaceINTEL       = (clCreateFromVA_APIMediaSurfaceINTEL_fn)
-            clGetExtensionFunctionAddressForPlatform(platforms[i], "clCreateFromVA_APIMediaSurfaceINTEL");
-        clEnqueueAcquireVA_APIMediaSurfacesINTEL  = (clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)
-            clGetExtensionFunctionAddressForPlatform(platforms[i], "clEnqueueAcquireVA_APIMediaSurfacesINTEL");
-        clEnqueueReleaseVA_APIMediaSurfacesINTEL  = (clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)
-            clGetExtensionFunctionAddressForPlatform(platforms[i], "clEnqueueReleaseVA_APIMediaSurfacesINTEL");
-
-        if (((void*)clGetDeviceIDsFromVA_APIMediaAdapterINTEL == NULL) ||
-            ((void*)clCreateFromVA_APIMediaSurfaceINTEL == NULL) ||
-            ((void*)clEnqueueAcquireVA_APIMediaSurfacesINTEL == NULL) ||
-            ((void*)clEnqueueReleaseVA_APIMediaSurfacesINTEL == NULL))
-        {
-            continue;
-        }
-
-        // Query device list
-
-        cl_uint numDevices = 0;
-
-        status = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(platforms[i], CL_VA_API_DISPLAY_INTEL, display,
-                                                           CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, 0, NULL, &numDevices);
-        if ((status != CL_SUCCESS) || !(numDevices > 0))
-            continue;
-        numDevices = 1; // initializeContextFromHandle() expects only 1 device
-        status = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(platforms[i], CL_VA_API_DISPLAY_INTEL, display,
-                                                           CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, numDevices, &device, NULL);
-        if (status != CL_SUCCESS)
-            continue;
-
-        // Creating CL-VA media sharing OpenCL context
-
-        cl_context_properties props[] = {
-            CL_CONTEXT_VA_API_DISPLAY_INTEL, (cl_context_properties) display,
-            CL_CONTEXT_INTEROP_USER_SYNC, CL_FALSE, // no explicit sync required
-            0
-        };
-
-        context = clCreateContext(props, numDevices, &device, NULL, NULL, &status);
-        if (status != CL_SUCCESS)
-        {
-            clReleaseDevice(device);
-        }
-        else
-        {
-            found = i;
-            break;
-        }
-    }
-
-    if (found < 0)
-        CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for VA-API interop");
-
-    Context& ctx = Context::getDefault(false);
-    initializeContextFromHandle(ctx, platforms[found], context, device);
-    contextInitialized = true;
-    return ctx;
-#endif
-}
-
-#if defined(HAVE_VAAPI) && defined(HAVE_OPENCL)
-static bool ocl_convert_nv12_to_bgr(cl_mem clImageY, cl_mem clImageUV, cl_mem clBuffer, int step, int cols, int rows)
-{
-    ocl::Kernel k;
-    k.create("YUV2BGR_NV12_8u", cv::ocl::core::cvtclr_dx_oclsrc, "");
-    if (k.empty())
-        return false;
-
-    k.args(clImageY, clImageUV, clBuffer, step, cols, rows);
-
-    size_t globalsize[] = { cols, rows };
-    return k.run(2, globalsize, 0, false);
-}
-
-static bool ocl_convert_bgr_to_nv12(cl_mem clBuffer, int step, int cols, int rows, cl_mem clImageY, cl_mem clImageUV)
-{
-    ocl::Kernel k;
-    k.create("BGR2YUV_NV12_8u", cv::ocl::core::cvtclr_dx_oclsrc, "");
-    if (k.empty())
-        return false;
-
-    k.args(clBuffer, step, cols, rows, clImageY, clImageUV);
-
-    size_t globalsize[] = { cols, rows };
-    return k.run(2, globalsize, 0, false);
-}
-#endif // HAVE_VAAPI && HAVE_OPENCL
-
-} // namespace cv::vaapi::ocl
-
-void convertToVASurface(InputArray src, VASurfaceID surface, Size size)
-{
-    (void)src; (void)surface; (void)size;
-#if !defined(HAVE_VAAPI)
-    NO_VAAPI_SUPPORT_ERROR;
-#elif !defined(HAVE_OPENCL)
-    NO_OPENCL_SUPPORT_ERROR;
-#else
-    if (!contextInitialized)
-        CV_Error(cv::Error::OpenCLInitError, "OpenCL: Context for VA-API interop hasn't been created");
-
-    const int stype = CV_8UC4;
-
-    int srcType = src.type();
-    CV_Assert(srcType == stype);
-
-    Size srcSize = src.size();
-    CV_Assert(srcSize.width == size.width && srcSize.height == size.height);
-
-    UMat u = src.getUMat();
-
-    // TODO Add support for roi
-    CV_Assert(u.offset == 0);
-    CV_Assert(u.isContinuous());
-
-    cl_mem clBuffer = (cl_mem)u.handle(ACCESS_READ);
-
-    using namespace cv::ocl;
-    Context& ctx = Context::getDefault();
-    cl_context context = (cl_context)ctx.ptr();
-
-    cl_int status = 0;
-
-    cl_mem clImageY = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_WRITE_ONLY, &surface, 0, &status);
-    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (Y plane)");
-    cl_mem clImageUV = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_WRITE_ONLY, &surface, 1, &status);
-    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (UV plane)");
-
-    cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
-
-    cl_mem images[2] = { clImageY, clImageUV };
-    status = clEnqueueAcquireVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL);
-    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireVA_APIMediaSurfacesINTEL failed");
-    if (!ocl::ocl_convert_bgr_to_nv12(clBuffer, (int)u.step[0], u.cols, u.rows, clImageY, clImageUV))
-        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: ocl_convert_bgr_to_nv12 failed");
-    clEnqueueReleaseVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL);
-    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseVA_APIMediaSurfacesINTEL failed");
-
-    status = clFinish(q); // TODO Use events
-    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clFinish failed");
-
-    status = clReleaseMemObject(clImageY); // TODO RAII
-    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clReleaseMem failed (Y plane)");
-    status = clReleaseMemObject(clImageUV);
-    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clReleaseMem failed (UV plane)");
-#endif
-}
-
-void convertFromVASurface(VASurfaceID surface, Size size, OutputArray dst)
-{
-    (void)surface; (void)dst; (void)size;
-#if !defined(HAVE_VAAPI)
-    NO_VAAPI_SUPPORT_ERROR;
-#elif !defined(HAVE_OPENCL)
-    NO_OPENCL_SUPPORT_ERROR;
-#else
-    if (!contextInitialized)
-        CV_Error(cv::Error::OpenCLInitError, "OpenCL: Context for VA-API interop hasn't been created");
-
-    const int dtype = CV_8UC4;
-
-    // TODO Need to specify ACCESS_WRITE here somehow to prevent useless data copying!
-    dst.create(size, dtype);
-    UMat u = dst.getUMat();
-
-    // TODO Add support for roi
-    CV_Assert(u.offset == 0);
-    CV_Assert(u.isContinuous());
-
-    cl_mem clBuffer = (cl_mem)u.handle(ACCESS_WRITE);
-
-    using namespace cv::ocl;
-    Context& ctx = Context::getDefault();
-    cl_context context = (cl_context)ctx.ptr();
-
-    cl_int status = 0;
-
-    cl_mem clImageY = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_READ_ONLY, &surface, 0, &status);
-    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (Y plane)");
-    cl_mem clImageUV = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_READ_ONLY, &surface, 1, &status);
-    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (UV plane)");
-
-    cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
-
-    cl_mem images[2] = { clImageY, clImageUV };
-    status = clEnqueueAcquireVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL);
-    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireVA_APIMediaSurfacesINTEL failed");
-    if (!ocl::ocl_convert_nv12_to_bgr(clImageY, clImageUV, clBuffer, (int)u.step[0], u.cols, u.rows))
-        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: ocl_convert_nv12_to_bgr failed");
-    status = clEnqueueReleaseVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL);
-    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseVA_APIMediaSurfacesINTEL failed");
-
-    status = clFinish(q); // TODO Use events
-    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clFinish failed");
-
-    status = clReleaseMemObject(clImageY); // TODO RAII
-    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clReleaseMem failed (Y plane)");
-    status = clReleaseMemObject(clImageUV);
-    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clReleaseMem failed (UV plane)");
-#endif
-}
-
-}} // namespace cv::vaapi
--- a/modules/core/test/test_arithm.cpp
+++ b/modules/core/test/test_arithm.cpp
@ -1792,7 +1792,6 @@ INSTANTIATE_TEST_CASE_P(Arithm, SubtractOutputMatNotEmpty, testing::Combine(
    testing::Values(-1, CV_16S, CV_32S, CV_32F),
    testing::Bool()));

-
 TEST(Core_FindNonZero, singular)
 {
    Mat img(10, 10, CV_8U, Scalar::all(0));
@ -1816,3 +1815,21 @@ TEST(Core_BoolVector, support)
    ASSERT_EQ( nz, countNonZero(test) );
    ASSERT_FLOAT_EQ((float)nz/n, (float)(mean(test)[0]));
 }
+
+TEST(MinMaxLoc, Mat_IntMax_Without_Mask)
+{
+    Mat_<int> mat(50, 50);
+    int iMaxVal = numeric_limits<int>::max();
+    mat.setTo(iMaxVal);
+
+    double min, max;
+    Point minLoc, maxLoc;
+
+    minMaxLoc(mat, &min, &max, &minLoc, &maxLoc, Mat());
+
+    ASSERT_EQ(iMaxVal, min);
+    ASSERT_EQ(iMaxVal, max);
+
+    ASSERT_EQ(Point(0, 0), minLoc);
+    ASSERT_EQ(Point(0, 0), maxLoc);
+}
--- a/modules/core/test/test_ds.cpp
+++ b/modules/core/test/test_ds.cpp
@ -491,6 +491,7 @@ class Core_SeqBaseTest : public Core_DynStructBaseTest
 {
 public:
    Core_SeqBaseTest();
+    virtual ~Core_SeqBaseTest();
    void clear();
    void run( int );

@ -501,11 +502,14 @@ protected:
    int test_seq_ops( int iters );
 };

-
 Core_SeqBaseTest::Core_SeqBaseTest()
 {
 }

+Core_SeqBaseTest::~Core_SeqBaseTest()
+{
+    clear();
+}

 void Core_SeqBaseTest::clear()
 {
@ -1206,6 +1210,7 @@ class Core_SetTest : public Core_DynStructBaseTest
 {
 public:
    Core_SetTest();
+    virtual ~Core_SetTest();
    void clear();
    void run( int );

@ -1219,6 +1224,10 @@ Core_SetTest::Core_SetTest()
 {
 }

+Core_SetTest::~Core_SetTest()
+{
+    clear();
+}

 void Core_SetTest::clear()
 {
@ -1417,6 +1426,7 @@ class Core_GraphTest : public Core_DynStructBaseTest
 {
 public:
    Core_GraphTest();
+    virtual ~Core_GraphTest();
    void clear();
    void run( int );

@ -1430,6 +1440,10 @@ Core_GraphTest::Core_GraphTest()
 {
 }

+Core_GraphTest::~Core_GraphTest()
+{
+    clear();
+}

 void Core_GraphTest::clear()
 {
@ -2042,6 +2056,8 @@ void Core_GraphScanTest::run( int )
                CV_TS_SEQ_CHECK_CONDITION( vtx_count == 0 && edge_count == 0,
                                          "Not every vertex/edge has been visited" );
                update_progressbar();
+
+                cvReleaseGraphScanner( &scanner );
            }

            // for a random graph the test just checks that every graph vertex and
@ -2106,8 +2122,6 @@ void Core_GraphScanTest::run( int )
    catch(int)
    {
    }
-
-    cvReleaseGraphScanner( &scanner );
 }


--- a/modules/core/test/test_umat.cpp
+++ b/modules/core/test/test_umat.cpp
@ -243,9 +243,11 @@ TEST_P(UMatBasicTests, GetUMat)
        EXPECT_MAT_NEAR(ub, ua, 0);
    }
    {
-        Mat b;
-        b = a.getUMat(ACCESS_RW).getMat(ACCESS_RW);
-        EXPECT_MAT_NEAR(b, a, 0);
+        UMat u = a.getUMat(ACCESS_RW);
+        {
+            Mat b = u.getMat(ACCESS_RW);
+            EXPECT_MAT_NEAR(b, a, 0);
+        }
    }
    {
        Mat b;
@ -253,13 +255,15 @@ TEST_P(UMatBasicTests, GetUMat)
        EXPECT_MAT_NEAR(b, a, 0);
    }
    {
-        UMat ub;
-        ub = ua.getMat(ACCESS_RW).getUMat(ACCESS_RW);
-        EXPECT_MAT_NEAR(ub, ua, 0);
+        Mat m = ua.getMat(ACCESS_RW);
+        {
+            UMat ub = m.getUMat(ACCESS_RW);
+            EXPECT_MAT_NEAR(ub, ua, 0);
+        }
    }
 }

-INSTANTIATE_TEST_CASE_P(UMat, UMatBasicTests, Combine(testing::Values(CV_8U), testing::Values(1, 2),
+INSTANTIATE_TEST_CASE_P(UMat, UMatBasicTests, Combine(testing::Values(CV_8U, CV_64F), testing::Values(1, 2),
    testing::Values(cv::Size(1, 1), cv::Size(1, 128), cv::Size(128, 1), cv::Size(128, 128), cv::Size(640, 480)), Bool()));

 //////////////////////////////////////////////////////////////// Reshape ////////////////////////////////////////////////////////////////////////
@ -1080,7 +1084,7 @@ TEST(UMat, unmap_in_class)
                Mat dst;
                m.convertTo(dst, CV_32FC1);
                // some additional CPU-based per-pixel processing into dst
-                intermediateResult = dst.getUMat(ACCESS_READ);
+                intermediateResult = dst.getUMat(ACCESS_READ); // this violates lifetime of base(dst) / derived (intermediateResult) objects. Use copyTo?
                std::cout << "data processed..." << std::endl;
            } // problem is here: dst::~Mat()
            std::cout << "leave ProcessData()" << std::endl;
@ -1268,5 +1272,69 @@ TEST(UMat, DISABLED_Test_same_behaviour_write_and_write)
    ASSERT_TRUE(exceptionDetected); // data race
 }

+TEST(UMat, mat_umat_sync)
+{
+    UMat u(10, 10, CV_8UC1, Scalar(1));
+    {
+        Mat m = u.getMat(ACCESS_RW).reshape(1);
+        m.setTo(Scalar(255));
+    }
+
+    UMat uDiff;
+    compare(u, 255, uDiff, CMP_NE);
+    ASSERT_EQ(0, countNonZero(uDiff));
+}
+
+TEST(UMat, testTempObjects_UMat)
+{
+    UMat u(10, 10, CV_8UC1, Scalar(1));
+    {
+        UMat u2 = u.getMat(ACCESS_RW).getUMat(ACCESS_RW);
+        u2.setTo(Scalar(255));
+    }
+
+    UMat uDiff;
+    compare(u, 255, uDiff, CMP_NE);
+    ASSERT_EQ(0, countNonZero(uDiff));
+}
+
+// Disabled due to failure in VS 2015:
+//  C++11 is enabled by default ==>
+//  destructors have implicit 'noexcept(true)' specifier ==>
+//  throwing exception from destructor is not handled correctly
+#if defined(_MSC_VER) && _MSC_VER >= 1900 /* MSVC 14 */
+TEST(UMat, DISABLED_testTempObjects_Mat)
+#else
+TEST(UMat, testTempObjects_Mat)
+#endif
+{
+    Mat m(10, 10, CV_8UC1, Scalar(1));
+    {
+        Mat m2;
+        ASSERT_ANY_THROW(m2 = m.getUMat(ACCESS_RW).getMat(ACCESS_RW));
+    }
+}
+
+TEST(UMat, testWrongLifetime_UMat)
+{
+    UMat u(10, 10, CV_8UC1, Scalar(1));
+    {
+        UMat u2 = u.getMat(ACCESS_RW).getUMat(ACCESS_RW);
+        u.release(); // base object
+        u2.release(); // derived object, should show warning message
+    }
+}
+
+TEST(UMat, testWrongLifetime_Mat)
+{
+    Mat m(10, 10, CV_8UC1, Scalar(1));
+    {
+        UMat u = m.getUMat(ACCESS_RW);
+        Mat m2 = u.getMat(ACCESS_RW);
+        m.release(); // base object
+        m2.release(); // map of derived object
+        u.release(); // derived object, should show warning message
+    }
+}

 } } // namespace cvtest::ocl
--- a/modules/core/test/test_utils.cpp
+++ b/modules/core/test/test_utils.cpp
@ -0,0 +1,221 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "test_precomp.hpp"
+
+using namespace cv;
+
+namespace {
+
+static const char * const keys =
+    "{ h help    |       | print help }"
+    "{ i info    | false | print info }"
+    "{ t true    | true  | true value }"
+    "{ n unused  |       | dummy }"
+;
+
+TEST(CommandLineParser, testFailure)
+{
+    const char* argv[] = {"<bin>", "-q"};
+    const int argc = 2;
+    cv::CommandLineParser parser(argc, argv, keys);
+    EXPECT_ANY_THROW(parser.has("q"));
+    EXPECT_ANY_THROW(parser.get<bool>("q"));
+    EXPECT_ANY_THROW(parser.get<bool>(0));
+
+    parser.get<bool>("h");
+    EXPECT_FALSE(parser.check());
+}
+TEST(CommandLineParser, testHas_noValues)
+{
+    const char* argv[] = {"<bin>", "-h", "--info"};
+    const int argc = 3;
+    cv::CommandLineParser parser(argc, argv, keys);
+    EXPECT_TRUE(parser.has("help"));
+    EXPECT_TRUE(parser.has("h"));
+    EXPECT_TRUE(parser.has("info"));
+    EXPECT_TRUE(parser.has("i"));
+    EXPECT_FALSE(parser.has("n"));
+    EXPECT_FALSE(parser.has("unused"));
+}
+TEST(CommandLineParser, testHas_TrueValues)
+{
+    const char* argv[] = {"<bin>", "-h=TRUE", "--info=true"};
+    const int argc = 3;
+    cv::CommandLineParser parser(argc, argv, keys);
+    EXPECT_TRUE(parser.has("help"));
+    EXPECT_TRUE(parser.has("h"));
+    EXPECT_TRUE(parser.has("info"));
+    EXPECT_TRUE(parser.has("i"));
+    EXPECT_FALSE(parser.has("n"));
+    EXPECT_FALSE(parser.has("unused"));
+}
+TEST(CommandLineParser, testHas_TrueValues1)
+{
+    const char* argv[] = {"<bin>", "-h=1", "--info=1"};
+    const int argc = 3;
+    cv::CommandLineParser parser(argc, argv, keys);
+    EXPECT_TRUE(parser.has("help"));
+    EXPECT_TRUE(parser.has("h"));
+    EXPECT_TRUE(parser.has("info"));
+    EXPECT_TRUE(parser.has("i"));
+    EXPECT_FALSE(parser.has("n"));
+    EXPECT_FALSE(parser.has("unused"));
+}
+TEST(CommandLineParser, testHas_FalseValues0)
+{
+    const char* argv[] = {"<bin>", "-h=0", "--info=0"};
+    const int argc = 3;
+    cv::CommandLineParser parser(argc, argv, keys);
+    EXPECT_TRUE(parser.has("help"));
+    EXPECT_TRUE(parser.has("h"));
+    EXPECT_TRUE(parser.has("info"));
+    EXPECT_TRUE(parser.has("i"));
+    EXPECT_FALSE(parser.has("n"));
+    EXPECT_FALSE(parser.has("unused"));
+}
+
+TEST(CommandLineParser, testBoolOption_noArgs)
+{
+    const char* argv[] = {"<bin>"};
+    const int argc = 1;
+    cv::CommandLineParser parser(argc, argv, keys);
+    EXPECT_FALSE(parser.get<bool>("help"));
+    EXPECT_FALSE(parser.get<bool>("h"));
+    EXPECT_FALSE(parser.get<bool>("info"));
+    EXPECT_FALSE(parser.get<bool>("i"));
+    EXPECT_TRUE(parser.get<bool>("true")); // default is true
+    EXPECT_TRUE(parser.get<bool>("t"));
+}
+
+TEST(CommandLineParser, testBoolOption_noValues)
+{
+    const char* argv[] = {"<bin>", "-h", "--info"};
+    const int argc = 3;
+    cv::CommandLineParser parser(argc, argv, keys);
+    EXPECT_TRUE(parser.get<bool>("help"));
+    EXPECT_TRUE(parser.get<bool>("h"));
+    EXPECT_TRUE(parser.get<bool>("info"));
+    EXPECT_TRUE(parser.get<bool>("i"));
+}
+
+TEST(CommandLineParser, testBoolOption_TrueValues)
+{
+    const char* argv[] = {"<bin>", "-h=TRUE", "--info=true"};
+    const int argc = 3;
+    cv::CommandLineParser parser(argc, argv, keys);
+    //EXPECT_TRUE(parser.get<bool>("help"));
+    //EXPECT_TRUE(parser.get<bool>("h"));
+    EXPECT_TRUE(parser.get<bool>("info"));
+    EXPECT_TRUE(parser.get<bool>("i"));
+    EXPECT_FALSE(parser.get<bool>("unused"));
+    EXPECT_FALSE(parser.get<bool>("n"));
+}
+
+TEST(CommandLineParser, testBoolOption_FalseValues)
+{
+    const char* argv[] = {"<bin>", "--help=FALSE", "-i=false"};
+    const int argc = 3;
+    cv::CommandLineParser parser(argc, argv, keys);
+    EXPECT_FALSE(parser.get<bool>("help"));
+    EXPECT_FALSE(parser.get<bool>("h"));
+    EXPECT_FALSE(parser.get<bool>("info"));
+    EXPECT_FALSE(parser.get<bool>("i"));
+}
+
+
+static const char * const keys2 =
+    "{ h help    |          | print help }"
+    "{ @arg1     | default1 | param1 }"
+    "{ @arg2     |          | param2 }"
+    "{ n unused  |          | dummy }"
+;
+
+TEST(CommandLineParser, testPositional_noArgs)
+{
+    const char* argv[] = {"<bin>"};
+    const int argc = 1;
+    cv::CommandLineParser parser(argc, argv, keys2);
+    EXPECT_TRUE(parser.has("@arg1"));
+    EXPECT_FALSE(parser.has("@arg2"));
+    EXPECT_EQ("default1", parser.get<String>("@arg1"));
+    EXPECT_EQ("default1", parser.get<String>(0));
+
+    EXPECT_EQ("", parser.get<String>("@arg2"));
+    EXPECT_EQ("", parser.get<String>(1));
+}
+
+TEST(CommandLineParser, testPositional_default)
+{
+    const char* argv[] = {"<bin>", "test1", "test2"};
+    const int argc = 3;
+    cv::CommandLineParser parser(argc, argv, keys2);
+    EXPECT_TRUE(parser.has("@arg1"));
+    EXPECT_TRUE(parser.has("@arg2"));
+    EXPECT_EQ("test1", parser.get<String>("@arg1"));
+    EXPECT_EQ("test2", parser.get<String>("@arg2"));
+    EXPECT_EQ("test1", parser.get<String>(0));
+    EXPECT_EQ("test2", parser.get<String>(1));
+}
+
+TEST(CommandLineParser, testPositional_withFlagsBefore)
+{
+    const char* argv[] = {"<bin>", "-h", "test1", "test2"};
+    const int argc = 4;
+    cv::CommandLineParser parser(argc, argv, keys2);
+    EXPECT_TRUE(parser.has("@arg1"));
+    EXPECT_TRUE(parser.has("@arg2"));
+    EXPECT_EQ("test1", parser.get<String>("@arg1"));
+    EXPECT_EQ("test2", parser.get<String>("@arg2"));
+    EXPECT_EQ("test1", parser.get<String>(0));
+    EXPECT_EQ("test2", parser.get<String>(1));
+}
+
+TEST(CommandLineParser, testPositional_withFlagsAfter)
+{
+    const char* argv[] = {"<bin>", "test1", "test2", "-h"};
+    const int argc = 4;
+    cv::CommandLineParser parser(argc, argv, keys2);
+    EXPECT_TRUE(parser.has("@arg1"));
+    EXPECT_TRUE(parser.has("@arg2"));
+    EXPECT_EQ("test1", parser.get<String>("@arg1"));
+    EXPECT_EQ("test2", parser.get<String>("@arg2"));
+    EXPECT_EQ("test1", parser.get<String>(0));
+    EXPECT_EQ("test2", parser.get<String>(1));
+}
+
+TEST(CommandLineParser, testEmptyStringValue)
+{
+    static const char * const keys3 =
+            "{ @pos0 |        | empty default value }"
+            "{ @pos1 | <none> | forbid empty default value }";
+
+    const char* argv[] = {"<bin>"};
+    const int argc = 1;
+    cv::CommandLineParser parser(argc, argv, keys3);
+    // EXPECT_TRUE(parser.has("@pos0"));
+    EXPECT_EQ("", parser.get<String>("@pos0"));
+    EXPECT_TRUE(parser.check());
+
+    EXPECT_FALSE(parser.has("@pos1"));
+    parser.get<String>(1);
+    EXPECT_FALSE(parser.check());
+}
+
+TEST(CommandLineParser, positional_regression_5074_equal_sign)
+{
+    static const char * const keys3 =
+            "{ @eq0 |  | }"
+            "{ eq1  |  | }";
+
+    const char* argv[] = {"<bin>", "1=0", "--eq1=1=0"};
+    const int argc = 3;
+    cv::CommandLineParser parser(argc, argv, keys3);
+    EXPECT_EQ("1=0", parser.get<String>("@eq0"));
+    EXPECT_EQ("1=0", parser.get<String>(0));
+    EXPECT_EQ("1=0", parser.get<String>("eq1"));
+    EXPECT_TRUE(parser.check());
+}
+
+} // namespace
--- a/modules/cudaarithm/src/cuda/normalize.cu
+++ b/modules/cudaarithm/src/cuda/normalize.cu
@ -249,6 +249,10 @@ void cv::cuda::normalize(InputArray _src, OutputArray _dst, double a, double b,
    CV_Assert( src.channels() == 1 );
    CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );

+    if (dtype < 0)
+    {
+        dtype = _dst.fixedType() ? _dst.type() : src.type();
+    }
    dtype = CV_MAT_DEPTH(dtype);

    const int src_depth = src.depth();
--- a/modules/cudaarithm/test/test_reductions.cpp
+++ b/modules/cudaarithm/test/test_reductions.cpp
@ -951,11 +951,11 @@ CUDA_TEST_P(Normalize, WithMask)

    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
    dst.setTo(cv::Scalar::all(0));
-    cv::cuda::normalize(loadMat(src, useRoi), dst, alpha, beta, norm_type, type, loadMat(mask, useRoi));
+    cv::cuda::normalize(loadMat(src, useRoi), dst, alpha, beta, norm_type, -1, loadMat(mask, useRoi));

    cv::Mat dst_gold(size, type);
    dst_gold.setTo(cv::Scalar::all(0));
-    cv::normalize(src, dst_gold, alpha, beta, norm_type, type, mask);
+    cv::normalize(src, dst_gold, alpha, beta, norm_type, -1, mask);

    EXPECT_MAT_NEAR(dst_gold, dst, type < CV_32F ? 1.0 : 1e-4);
 }
--- a/modules/cudaimgproc/src/cuda/bilateral_filter.cu
+++ b/modules/cudaimgproc/src/cuda/bilateral_filter.cu
@ -136,7 +136,7 @@ namespace cv { namespace cuda { namespace device
            float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);

            cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
-            bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);
+            bilateral_kernel<<<grid, block, 0, stream>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);
            cudaSafeCall ( cudaGetLastError () );

            if (stream == 0)
--- a/modules/cudev/test/CMakeLists.txt
+++ b/modules/cudev/test/CMakeLists.txt
@ -32,6 +32,10 @@ if(OCV_DEPENDENCIES_FOUND)
  ocv_target_link_libraries(${the_target} ${test_deps} ${OPENCV_LINKER_LIBS} ${CUDA_LIBRARIES})
  add_dependencies(opencv_tests ${the_target})

+  set_target_properties(${the_target} PROPERTIES LABELS "${OPENCV_MODULE_${the_module}_LABEL}")
+  set_source_files_properties(${OPENCV_TEST_${the_module}_SOURCES} ${${the_target}_pch}
+    PROPERTIES LABELS "${OPENCV_MODULE_${the_module}_LABEL};AccuracyTest")
+
  # Additional target properties
  set_target_properties(${the_target} PROPERTIES
    DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
@ -42,9 +46,7 @@ if(OCV_DEPENDENCIES_FOUND)
    set_target_properties(${the_target} PROPERTIES FOLDER "tests accuracy")
  endif()

-  enable_testing()
-  get_target_property(LOC ${the_target} LOCATION)
-  add_test(${the_target} "${LOC}")
+  ocv_add_test_from_target("${the_target}" "Accuracy" "${the_target}")

  if(INSTALL_TESTS)
    install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_TEST_INSTALL_PATH} COMPONENT tests)
--- a/modules/features2d/src/agast.cpp
+++ b/modules/features2d/src/agast.cpp
@ -7511,19 +7511,22 @@ Ptr<AgastFeatureDetector> AgastFeatureDetector::create( int threshold, bool nonm

 void AGAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression, int type)
 {
+
+    std::vector<KeyPoint> kpts;
+
    // detect
    switch(type) {
      case AgastFeatureDetector::AGAST_5_8:
-        AGAST_5_8(_img, keypoints, threshold);
+        AGAST_5_8(_img, kpts, threshold);
        break;
      case AgastFeatureDetector::AGAST_7_12d:
-        AGAST_7_12d(_img, keypoints, threshold);
+        AGAST_7_12d(_img, kpts, threshold);
        break;
      case AgastFeatureDetector::AGAST_7_12s:
-        AGAST_7_12s(_img, keypoints, threshold);
+        AGAST_7_12s(_img, kpts, threshold);
        break;
      case AgastFeatureDetector::OAST_9_16:
-        OAST_9_16(_img, keypoints, threshold);
+        OAST_9_16(_img, kpts, threshold);
        break;
    }

@ -7534,7 +7537,7 @@ void AGAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, boo
    makeAgastOffsets(pixel_, (int)img.step, type);

    std::vector<KeyPoint>::iterator kpt;
-    for(kpt = keypoints.begin(); kpt != keypoints.end(); kpt++)
+    for(kpt = kpts.begin(); kpt != kpts.end(); kpt++)
    {
        switch(type) {
          case AgastFeatureDetector::AGAST_5_8:
@ -7555,20 +7558,21 @@ void AGAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, boo
            break;
        }
    }
+
    // suppression
    if(nonmax_suppression)
    {
        size_t j;
        size_t curr_idx;
        size_t lastRow = 0, next_lastRow = 0;
-        size_t num_Corners = keypoints.size();
+        size_t num_Corners = kpts.size();
        size_t lastRowCorner_ind = 0, next_lastRowCorner_ind = 0;

        std::vector<int> nmsFlags;
        std::vector<KeyPoint>::iterator currCorner_nms;
        std::vector<KeyPoint>::const_iterator currCorner;

-        currCorner = keypoints.begin();
+        currCorner = kpts.begin();

        nmsFlags.resize((int)num_Corners);

@ -7593,11 +7597,11 @@ void AGAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, boo
            if(lastRow + 1 == currCorner->pt.y)
            {
                // find the corner above the current one
-                while( (keypoints[lastRowCorner_ind].pt.x < currCorner->pt.x)
-                    && (keypoints[lastRowCorner_ind].pt.y == lastRow) )
+                while( (kpts[lastRowCorner_ind].pt.x < currCorner->pt.x)
+                    && (kpts[lastRowCorner_ind].pt.y == lastRow) )
                    lastRowCorner_ind++;

-                    if( (keypoints[lastRowCorner_ind].pt.x == currCorner->pt.x)
+                    if( (kpts[lastRowCorner_ind].pt.x == currCorner->pt.x)
                     && (lastRowCorner_ind != curr_idx) )
                    {
                        size_t w = lastRowCorner_ind;
@ -7605,7 +7609,7 @@ void AGAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, boo
                        while(nmsFlags[w] != -1)
                            w = nmsFlags[w];

-                        if(keypoints[curr_idx].response < keypoints[w].response)
+                        if(kpts[curr_idx].response < kpts[w].response)
                            nmsFlags[curr_idx] = (int)w;
                        else
                            nmsFlags[w] = (int)curr_idx;
@ -7614,8 +7618,8 @@ void AGAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, boo

            // check left
            t = (int)curr_idx - 1;
-            if( (curr_idx != 0) && (keypoints[t].pt.y == currCorner->pt.y)
-             && (keypoints[t].pt.x + 1 == currCorner->pt.x) )
+            if( (curr_idx != 0) && (kpts[t].pt.y == currCorner->pt.y)
+             && (kpts[t].pt.x + 1 == currCorner->pt.x) )
            {
                int currCornerMaxAbove_ind = nmsFlags[curr_idx];
                // find the maximum in that area
@ -7626,7 +7630,7 @@ void AGAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, boo
                {
                    if((size_t)t != curr_idx)
                    {
-                        if ( keypoints[curr_idx].response < keypoints[t].response )
+                        if ( kpts[curr_idx].response < kpts[t].response )
                            nmsFlags[curr_idx] = t;
                        else
                            nmsFlags[t] = (int)curr_idx;
@ -7636,7 +7640,7 @@ void AGAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, boo
                {
                    if(t != currCornerMaxAbove_ind)
                    {
-                        if(keypoints[currCornerMaxAbove_ind].response < keypoints[t].response)
+                        if(kpts[currCornerMaxAbove_ind].response < kpts[t].response)
                        {
                            nmsFlags[currCornerMaxAbove_ind] = t;
                            nmsFlags[curr_idx] = t;
@ -7652,19 +7656,15 @@ void AGAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, boo
            currCorner++;
        }

-        // marks non-maximum corners
+        // collecting maximum corners
        for(curr_idx = 0; curr_idx < num_Corners; curr_idx++)
        {
-            if (nmsFlags[curr_idx] != -1)
-                keypoints[curr_idx].response = -1;
-        }
-
-        // erase non-maximum corners
-        for (j = keypoints.size(); j > 0; j--)
-        {
-            if (keypoints[j - 1].response == -1)
-                keypoints.erase(keypoints.begin() + j - 1 );
+            if (nmsFlags[curr_idx] == -1)
+                keypoints.push_back(kpts[curr_idx]);
        }
+    } else
+    {
+      keypoints = kpts;
    }
 }

--- a/modules/features2d/src/fast.cpp
+++ b/modules/features2d/src/fast.cpp
@ -262,7 +262,7 @@ static bool ocl_FAST( InputArray _img, std::vector<KeyPoint>& keypoints,
    UMat img = _img.getUMat();
    if( img.cols < 7 || img.rows < 7 )
        return false;
-    size_t globalsize[] = { img.cols-6, img.rows-6 };
+    size_t globalsize[] = { (size_t)img.cols-6, (size_t)img.rows-6 };

    ocl::Kernel fastKptKernel("FAST_findKeypoints", ocl::features2d::fast_oclsrc);
    if (fastKptKernel.empty())
@ -306,7 +306,7 @@ static bool ocl_FAST( InputArray _img, std::vector<KeyPoint>& keypoints,
        if (fastNMSKernel.empty())
            return false;

-        size_t globalsize_nms[] = { counter };
+        size_t globalsize_nms[] = { (size_t)counter };
        if( !fastNMSKernel.args(ocl::KernelArg::PtrReadOnly(kp1),
                                ocl::KernelArg::PtrReadWrite(kp2),
                                ocl::KernelArg::ReadOnly(img),
--- a/modules/features2d/src/main.cpp
+++ b/modules/features2d/src/main.cpp
@ -0,0 +1,52 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+//
+// Library initialization file
+//
+
+#include "precomp.hpp"
+
+IPP_INITIALIZER_AUTO
+
+/* End of file. */
--- a/modules/features2d/src/matchers.cpp
+++ b/modules/features2d/src/matchers.cpp
@ -97,8 +97,8 @@ static bool ocl_matchSingle(InputArray query, InputArray train,
    if(k.empty())
        return false;

-    size_t globalSize[] = {(query.size().height + block_size - 1) / block_size * block_size, block_size};
-    size_t localSize[] = {block_size, block_size};
+    size_t globalSize[] = {((size_t)query.size().height + block_size - 1) / block_size * block_size, (size_t)block_size};
+    size_t localSize[] = {(size_t)block_size, (size_t)block_size};

    int idx = 0;
    idx = k.set(idx, ocl::KernelArg::PtrReadOnly(uquery));
@ -197,8 +197,8 @@ static bool ocl_knnMatchSingle(InputArray query, InputArray train, UMat &trainId
    if(k.empty())
        return false;

-    size_t globalSize[] = {(query_rows + block_size - 1) / block_size * block_size, block_size};
-    size_t localSize[] = {block_size, block_size};
+    size_t globalSize[] = {((size_t)query_rows + block_size - 1) / block_size * block_size, (size_t)block_size};
+    size_t localSize[] = {(size_t)block_size, (size_t)block_size};

    int idx = 0;
    idx = k.set(idx, ocl::KernelArg::PtrReadOnly(uquery));
@ -306,8 +306,8 @@ static bool ocl_radiusMatchSingle(InputArray query, InputArray train,
    if (k.empty())
        return false;

-    size_t globalSize[] = {(train_rows + block_size - 1) / block_size * block_size, (query_rows + block_size - 1) / block_size * block_size};
-    size_t localSize[] = {block_size, block_size};
+    size_t globalSize[] = {((size_t)train_rows + block_size - 1) / block_size * block_size, ((size_t)query_rows + block_size - 1) / block_size * block_size};
+    size_t localSize[] = {(size_t)block_size, (size_t)block_size};

    int idx = 0;
    idx = k.set(idx, ocl::KernelArg::PtrReadOnly(uquery));
--- a/modules/features2d/src/orb.cpp
+++ b/modules/features2d/src/orb.cpp
@ -64,7 +64,7 @@ ocl_HarrisResponses(const UMat& imgbuf,
                    UMat& responses,
                    int nkeypoints, int blockSize, float harris_k)
 {
-    size_t globalSize[] = {nkeypoints};
+    size_t globalSize[] = {(size_t)nkeypoints};

    float scale = 1.f/((1 << 2) * blockSize * 255.f);
    float scale_sq_sq = scale * scale * scale * scale;
@ -86,7 +86,7 @@ ocl_ICAngles(const UMat& imgbuf, const UMat& layerinfo,
             const UMat& keypoints, UMat& responses,
             const UMat& umax, int nkeypoints, int half_k)
 {
-    size_t globalSize[] = {nkeypoints};
+    size_t globalSize[] = {(size_t)nkeypoints};

    ocl::Kernel icangle_ker("ORB_ICAngle", ocl::features2d::orb_oclsrc, "-D ORB_ANGLES");
    if( icangle_ker.empty() )
@ -106,7 +106,7 @@ ocl_computeOrbDescriptors(const UMat& imgbuf, const UMat& layerInfo,
                          const UMat& keypoints, UMat& desc, const UMat& pattern,
                          int nkeypoints, int dsize, int wta_k)
 {
-    size_t globalSize[] = {nkeypoints};
+    size_t globalSize[] = {(size_t)nkeypoints};

    ocl::Kernel desc_ker("ORB_computeDescriptor", ocl::features2d::orb_oclsrc,
                         format("-D ORB_DESCRIPTORS -D WTA_K=%d", wta_k));
--- a/modules/features2d/test/test_descriptors_regression.cpp
+++ b/modules/features2d/test/test_descriptors_regression.cpp
@ -60,7 +60,7 @@ static void writeMatInBin( const Mat& mat, const string& filename )
        fwrite( (void*)&mat.rows, sizeof(int), 1, f );
        fwrite( (void*)&mat.cols, sizeof(int), 1, f );
        fwrite( (void*)&type, sizeof(int), 1, f );
-        int dataSize = (int)(mat.step * mat.rows * mat.channels());
+        int dataSize = (int)(mat.step * mat.rows);
        fwrite( (void*)&dataSize, sizeof(int), 1, f );
        fwrite( (void*)mat.ptr(), 1, dataSize, f );
        fclose(f);
@ -82,13 +82,14 @@ static Mat readMatFromBin( const string& filename )
        int step = dataSize / rows / CV_ELEM_SIZE(type);
        CV_Assert(step >= cols);

-        Mat m = Mat(rows, step, type).colRange(0, cols);
+        Mat returnMat = Mat(rows, step, type).colRange(0, cols);

-        size_t elements_read = fread( m.ptr(), 1, dataSize, f );
+        size_t elements_read = fread( returnMat.ptr(), 1, dataSize, f );
        CV_Assert(elements_read == (size_t)(dataSize));
+
        fclose(f);

-        return m;
+        return returnMat;
    }
    return Mat();
 }
--- a/modules/features2d/test/test_nearestneighbors.cpp
+++ b/modules/features2d/test/test_nearestneighbors.cpp
@ -67,13 +67,13 @@ protected:
    virtual void run( int start_from );
    virtual void createModel( const Mat& data ) = 0;
    virtual int findNeighbors( Mat& points, Mat& neighbors ) = 0;
-    virtual int checkGetPoins( const Mat& data );
+    virtual int checkGetPoints( const Mat& data );
    virtual int checkFindBoxed();
    virtual int checkFind( const Mat& data );
    virtual void releaseModel() = 0;
 };

-int NearestNeighborTest::checkGetPoins( const Mat& )
+int NearestNeighborTest::checkGetPoints( const Mat& )
 {
   return cvtest::TS::OK;
 }
@ -127,11 +127,11 @@ int NearestNeighborTest::checkFind( const Mat& data )
 void NearestNeighborTest::run( int /*start_from*/ ) {
    int code = cvtest::TS::OK, tempCode;
    Mat desc( featuresCount, dims, CV_32FC1 );
-    randu( desc, Scalar(minValue), Scalar(maxValue) );
+    ts->get_rng().fill( desc, RNG::UNIFORM, minValue, maxValue );

    createModel( desc );

-    tempCode = checkGetPoins( desc );
+    tempCode = checkGetPoints( desc );
    if( tempCode != cvtest::TS::OK )
    {
        ts->printf( cvtest::TS::LOG, "bad accuracy of GetPoints \n" );
@ -161,7 +161,7 @@ void NearestNeighborTest::run( int /*start_from*/ ) {
 class CV_FlannTest : public NearestNeighborTest
 {
 public:
-    CV_FlannTest() {}
+    CV_FlannTest() : NearestNeighborTest(), index(NULL) { }
 protected:
    void createIndex( const Mat& data, const IndexParams& params );
    int knnSearch( Mat& points, Mat& neighbors );
@ -172,6 +172,9 @@ protected:

 void CV_FlannTest::createIndex( const Mat& data, const IndexParams& params )
 {
+    // release previously allocated index
+    releaseModel();
+
    index = new Index( data, params );
 }

@ -238,7 +241,11 @@ int CV_FlannTest::radiusSearch( Mat& points, Mat& neighbors )

 void CV_FlannTest::releaseModel()
 {
-    delete index;
+    if (index)
+    {
+        delete index;
+        index = NULL;
+    }
 }

 //---------------------------------------
--- a/modules/flann/include/opencv2/flann/autotuned_index.h
+++ b/modules/flann/include/opencv2/flann/autotuned_index.h
@ -377,6 +377,7 @@ private:
        // evaluate kdtree for all parameter combinations
        for (size_t i = 0; i < FLANN_ARRAY_LEN(testTrees); ++i) {
            CostData cost;
+            cost.params["algorithm"] = FLANN_INDEX_KDTREE;
            cost.params["trees"] = testTrees[i];

            evaluate_kdtree(cost);
--- a/modules/flann/include/opencv2/flann/kmeans_index.h
+++ b/modules/flann/include/opencv2/flann/kmeans_index.h
@ -441,6 +441,8 @@ public:
        }

        root_ = pool_.allocate<KMeansNode>();
+        std::memset(root_, 0, sizeof(KMeansNode));
+
        computeNodeStatistics(root_, indices_, (int)size_);
        computeClustering(root_, indices_, (int)size_, branching_,0);
    }
@ -864,11 +866,11 @@ private:
            variance -= distance_(centers[c], ZeroIterator<ElementType>(), veclen_);

            node->childs[c] = pool_.allocate<KMeansNode>();
+            std::memset(node->childs[c], 0, sizeof(KMeansNode));
            node->childs[c]->radius = radiuses[c];
            node->childs[c]->pivot = centers[c];
            node->childs[c]->variance = variance;
            node->childs[c]->mean_radius = mean_radius;
-            node->childs[c]->indices = NULL;
            computeClustering(node->childs[c],indices+start, end-start, branching, level+1);
            start=end;
        }
--- a/modules/hal/include/opencv2/hal.hpp
+++ b/modules/hal/include/opencv2/hal.hpp
@ -49,10 +49,21 @@

 /**
  @defgroup hal Hardware Acceleration Layer
+  @{
+    @defgroup hal_intrin Universal intrinsics
+    @{
+      @defgroup hal_intrin_impl Private implementation helpers
+    @}
+    @defgroup hal_utils Platform-dependent utils
+  @}
 */

+
 namespace cv { namespace hal {

+//! @addtogroup hal
+//! @{
+
 namespace Error {

 enum
@ -93,6 +104,8 @@ void sqrt(const double* src, double* dst, int len);
 void invSqrt(const float* src, float* dst, int len);
 void invSqrt(const double* src, double* dst, int len);

+//! @}
+
 }} //cv::hal

 #endif //__OPENCV_HAL_HPP__
--- a/modules/hal/include/opencv2/hal/defs.h
+++ b/modules/hal/include/opencv2/hal/defs.h
@ -45,6 +45,9 @@
 #ifndef __OPENCV_DEF_H__
 #define __OPENCV_DEF_H__

+//! @addtogroup hal_utils
+//! @{
+
 #if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
 #  define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
 #endif
@ -335,9 +338,6 @@ Cv64suf;
 #  include "tegra_round.hpp"
 #endif

-//! @addtogroup core_utils
-//! @{
-
 #if CV_VFP
    // 1. general scheme
    #define ARM_ROUND(_value, _asm_string) \
@ -567,15 +567,19 @@ CV_INLINE int cvIsInf( float value )
    return (ieee754.u & 0x7fffffff) == 0x7f800000;
 }

+//! @}
+
 #include <algorithm>

 namespace cv
 {

+//! @addtogroup hal_utils
+//! @{
+
 /////////////// saturate_cast (used in image & signal processing) ///////////////////

-/**
- Template function for accurate conversion from one primitive type to another.
+/** @brief Template function for accurate conversion from one primitive type to another.

 The functions saturate_cast resemble the standard C++ cast operations, such as static_cast\<T\>()
 and others. They perform an efficient and accurate conversion from one primitive type to another
@ -618,8 +622,6 @@ template<typename _Tp> static inline _Tp saturate_cast(int64 v)    { return _Tp(
 /** @overload */
 template<typename _Tp> static inline _Tp saturate_cast(uint64 v)   { return _Tp(v); }

-//! @cond IGNORED
-
 template<> inline uchar saturate_cast<uchar>(schar v)        { return (uchar)std::max((int)v, 0); }
 template<> inline uchar saturate_cast<uchar>(ushort v)       { return (uchar)std::min((unsigned)v, (unsigned)UCHAR_MAX); }
 template<> inline uchar saturate_cast<uchar>(int v)          { return (uchar)((unsigned)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
@ -664,12 +666,10 @@ template<> inline int saturate_cast<int>(double v)           { return cvRound(v)
 template<> inline unsigned saturate_cast<unsigned>(float v)  { return cvRound(v); }
 template<> inline unsigned saturate_cast<unsigned>(double v) { return cvRound(v); }

-//! @endcond
+//! @}

 }

 #endif // __cplusplus

-//! @} core_utils
-
 #endif //__OPENCV_HAL_H__
--- a/modules/hal/include/opencv2/hal/intrin.hpp
+++ b/modules/hal/include/opencv2/hal/intrin.hpp
@ -48,6 +48,7 @@
 #include <cmath>
 #include <float.h>
 #include <stdlib.h>
+#include "opencv2/hal/defs.h"

 #define OPENCV_HAL_ADD(a, b) ((a) + (b))
 #define OPENCV_HAL_AND(a, b) ((a) & (b))
@ -59,6 +60,10 @@
 // access from within opencv code more accessible
 namespace cv {

+//! @addtogroup hal_intrin
+//! @{
+
+//! @cond IGNORED
 template<typename _Tp> struct V_TypeTraits
 {
    typedef _Tp int_type;
@ -82,6 +87,7 @@ template<> struct V_TypeTraits<uchar>
    typedef int sum_type;

    typedef ushort w_type;
+    typedef unsigned q_type;

    enum { delta = 128, shift = 8 };

@ -99,6 +105,7 @@ template<> struct V_TypeTraits<schar>
    typedef int sum_type;

    typedef short w_type;
+    typedef int q_type;

    enum { delta = 128, shift = 8 };

@ -265,8 +272,22 @@ template<> struct V_TypeTraits<double>
    }
 };

+template <typename T> struct V_SIMD128Traits
+{
+    enum { nlanes = 16 / sizeof(T) };
+};
+
+//! @endcond
+
+//! @}
+
 }

+#ifdef CV_DOXYGEN
+#   undef CV_SSE2
+#   undef CV_NEON
+#endif
+
 #if CV_SSE2

 #include "opencv2/hal/intrin_sse.hpp"
@ -281,12 +302,19 @@ template<> struct V_TypeTraits<double>

 #endif

+//! @addtogroup hal_intrin
+//! @{
+
 #ifndef CV_SIMD128
+//! Set to 1 if current compiler supports vector extensions (NEON or SSE is enabled)
 #define CV_SIMD128 0
 #endif

 #ifndef CV_SIMD128_64F
+//! Set to 1 if current intrinsics implementation supports 64-bit float vectors
 #define CV_SIMD128_64F 0
 #endif

+//! @}
+
 #endif
--- a/modules/hal/include/opencv2/hal/intrin_cpp.hpp
+++ b/modules/hal/include/opencv2/hal/intrin_cpp.hpp
--- a/modules/hal/include/opencv2/hal/intrin_neon.hpp
+++ b/modules/hal/include/opencv2/hal/intrin_neon.hpp
@ -48,6 +48,8 @@
 namespace cv
 {

+//! @cond IGNORED
+
 #define CV_SIMD128 1

 struct v_uint8x16
@ -278,14 +280,15 @@ void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
 }

 OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_uint16x8, u16, pack, n)
-OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_int16x8, s16, pack_u, un)
 OPENCV_HAL_IMPL_NEON_PACK(v_int8x16, schar, int8x8_t, s8, v_int16x8, s16, pack, n)
 OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_uint32x4, u32, pack, n)
-OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_int32x4, s32, pack_u, un)
 OPENCV_HAL_IMPL_NEON_PACK(v_int16x8, short, int16x4_t, s16, v_int32x4, s32, pack, n)
 OPENCV_HAL_IMPL_NEON_PACK(v_uint32x4, unsigned, uint32x2_t, u32, v_uint64x2, u64, pack, n)
 OPENCV_HAL_IMPL_NEON_PACK(v_int32x4, int, int32x2_t, s32, v_int64x2, s64, pack, n)

+OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_int16x8, s16, pack_u, un)
+OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_int32x4, s32, pack_u, un)
+
 inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
                            const v_float32x4& m1, const v_float32x4& m2,
                            const v_float32x4& m3)
@ -374,7 +377,7 @@ inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
 {
    int32x4_t c = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val));
    int32x4_t d = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val));
-    int32x4x2_t cd = vtrnq_s32(c, d);
+    int32x4x2_t cd = vuzpq_s32(c, d);
    return v_int32x4(vaddq_s32(cd.val[0], cd.val[1]));
 }

@ -497,6 +500,16 @@ OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_absdiff, vabdq_u16)
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_absdiff, vabdq_u32)
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_absdiff, vabdq_f32)

+#define OPENCV_HAL_IMPL_NEON_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin) \
+inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec2(cast(intrin(a.val, b.val))); \
+}
+
+OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int8x16, v_uint8x16, vreinterpretq_u8_s8, v_absdiff, vabdq_s8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int16x8, v_uint16x8, vreinterpretq_u16_s16, v_absdiff, vabdq_s16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int32x4, v_uint32x4, vreinterpretq_u32_s32, v_absdiff, vabdq_s32)
+
 inline v_float32x4 v_magnitude(const v_float32x4& a, const v_float32x4& b)
 {
    v_float32x4 x(vmlaq_f32(vmulq_f32(a.val, a.val), b.val, b.val));
@ -641,13 +654,13 @@ inline bool v_check_all(const v_float32x4& a)
 { return v_check_all(v_reinterpret_as_u32(a)); }

 inline bool v_check_any(const v_int8x16& a)
-{ return v_check_all(v_reinterpret_as_u8(a)); }
+{ return v_check_any(v_reinterpret_as_u8(a)); }
 inline bool v_check_any(const v_int16x8& a)
-{ return v_check_all(v_reinterpret_as_u16(a)); }
+{ return v_check_any(v_reinterpret_as_u16(a)); }
 inline bool v_check_any(const v_int32x4& a)
-{ return v_check_all(v_reinterpret_as_u32(a)); }
+{ return v_check_any(v_reinterpret_as_u32(a)); }
 inline bool v_check_any(const v_float32x4& a)
-{ return v_check_all(v_reinterpret_as_u32(a)); }
+{ return v_check_any(v_reinterpret_as_u32(a)); }

 #define OPENCV_HAL_IMPL_NEON_SELECT(_Tpvec, suffix, usuffix) \
 inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
@ -678,6 +691,8 @@ OPENCV_HAL_IMPL_NEON_EXPAND(v_uint8x16, v_uint16x8, uchar, u8)
 OPENCV_HAL_IMPL_NEON_EXPAND(v_int8x16, v_int16x8, schar, s8)
 OPENCV_HAL_IMPL_NEON_EXPAND(v_uint16x8, v_uint32x4, ushort, u16)
 OPENCV_HAL_IMPL_NEON_EXPAND(v_int16x8, v_int32x4, short, s16)
+OPENCV_HAL_IMPL_NEON_EXPAND(v_uint32x4, v_uint64x2, uint, u32)
+OPENCV_HAL_IMPL_NEON_EXPAND(v_int32x4, v_int64x2, int, s32)

 inline v_uint32x4 v_load_expand_q(const uchar* ptr)
 {
@ -840,6 +855,8 @@ inline v_float32x4 v_cvt_f32(const v_int32x4& a)
    return v_float32x4(vcvtq_f32_s32(a.val));
 }

+//! @endcond
+
 }

 #endif
--- a/modules/hal/include/opencv2/hal/intrin_sse.hpp
+++ b/modules/hal/include/opencv2/hal/intrin_sse.hpp
@ -51,6 +51,8 @@
 namespace cv
 {

+//! @cond IGNORED
+
 struct v_uint8x16
 {
    typedef uchar lane_type;
@ -296,6 +298,11 @@ OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int32x4, s32)
 OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_uint64x2, u64)
 OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int64x2, s64)

+inline v_float32x4 v_reinterpret_as_f32(const v_float32x4& a) {return a; }
+inline v_float64x2 v_reinterpret_as_f64(const v_float64x2& a) {return a; }
+inline v_float32x4 v_reinterpret_as_f32(const v_float64x2& a) {return v_float32x4(_mm_castpd_ps(a.val)); }
+inline v_float64x2 v_reinterpret_as_f64(const v_float32x4& a) {return v_float64x2(_mm_castps_pd(a.val)); }
+
 //////////////// PACK ///////////////
 inline v_uint8x16 v_pack(const v_uint16x8& a, const v_uint16x8& b)
 {
@ -430,6 +437,17 @@ inline void v_pack_u_store(ushort* ptr, const v_int32x4& a)
    _mm_storel_epi64((__m128i*)ptr, r);
 }

+template<int n> inline
+v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b)
+{
+    __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768);
+    __m128i a1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), delta32);
+    __m128i a2 = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768));
+    __m128i b1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(b.val, delta), n), delta32);
+    __m128i b2 = _mm_sub_epi16(_mm_packs_epi32(b1, b1), _mm_set1_epi16(-32768));
+    return v_uint16x8(_mm_unpacklo_epi64(a2, b2));
+}
+
 template<int n> inline
 void v_rshr_pack_u_store(ushort* ptr, const v_int32x4& a)
 {
@ -460,7 +478,7 @@ void v_rshr_pack_store(short* ptr, const v_int32x4& a)
 {
    __m128i delta = _mm_set1_epi32(1 << (n-1));
    __m128i a1 = _mm_srai_epi32(_mm_add_epi32(a.val, delta), n);
-    _mm_storel_epi64((__m128i*)ptr, a1);
+    _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi32(a1, a1));
 }


@ -469,7 +487,7 @@ inline v_uint32x4 v_pack(const v_uint64x2& a, const v_uint64x2& b)
 {
    __m128i v0 = _mm_unpacklo_epi32(a.val, b.val); // a0 a1 0 0
    __m128i v1 = _mm_unpackhi_epi32(a.val, b.val); // b0 b1 0 0
-    return v_uint32x4(_mm_unpacklo_epi64(v0, v1));
+    return v_uint32x4(_mm_unpacklo_epi32(v0, v1));
 }

 inline void v_pack_store(unsigned* ptr, const v_uint64x2& a)
@ -483,7 +501,7 @@ inline v_int32x4 v_pack(const v_int64x2& a, const v_int64x2& b)
 {
    __m128i v0 = _mm_unpacklo_epi32(a.val, b.val); // a0 a1 0 0
    __m128i v1 = _mm_unpackhi_epi32(a.val, b.val); // b0 b1 0 0
-    return v_int32x4(_mm_unpacklo_epi64(v0, v1));
+    return v_int32x4(_mm_unpacklo_epi32(v0, v1));
 }

 inline void v_pack_store(int* ptr, const v_int64x2& a)
@ -501,7 +519,7 @@ v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b)
    __m128i b1 = _mm_srli_epi64(_mm_add_epi64(b.val, delta2.val), n);
    __m128i v0 = _mm_unpacklo_epi32(a1, b1); // a0 a1 0 0
    __m128i v1 = _mm_unpackhi_epi32(a1, b1); // b0 b1 0 0
-    return v_uint32x4(_mm_unpacklo_epi64(v0, v1));
+    return v_uint32x4(_mm_unpacklo_epi32(v0, v1));
 }

 template<int n> inline
@ -534,7 +552,7 @@ v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b)
    __m128i b1 = v_srai_epi64(_mm_add_epi64(b.val, delta2.val), n);
    __m128i v0 = _mm_unpacklo_epi32(a1, b1); // a0 a1 0 0
    __m128i v1 = _mm_unpackhi_epi32(a1, b1); // b0 b1 0 0
-    return v_int32x4(_mm_unpacklo_epi64(v0, v1));
+    return v_int32x4(_mm_unpacklo_epi32(v0, v1));
 }

 template<int n> inline
@ -630,8 +648,8 @@ inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b,
 {
    __m128i v0 = _mm_mullo_epi16(a.val, b.val);
    __m128i v1 = _mm_mulhi_epi16(a.val, b.val);
-    c.val = _mm_unpacklo_epi32(v0, v1);
-    d.val = _mm_unpackhi_epi32(v0, v1);
+    c.val = _mm_unpacklo_epi16(v0, v1);
+    d.val = _mm_unpackhi_epi16(v0, v1);
 }

 inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b,
@ -639,8 +657,8 @@ inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b,
 {
    __m128i v0 = _mm_mullo_epi16(a.val, b.val);
    __m128i v1 = _mm_mulhi_epu16(a.val, b.val);
-    c.val = _mm_unpacklo_epi32(v0, v1);
-    d.val = _mm_unpackhi_epi32(v0, v1);
+    c.val = _mm_unpacklo_epi16(v0, v1);
+    d.val = _mm_unpackhi_epi16(v0, v1);
 }

 inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
@ -869,6 +887,18 @@ inline _Tpuvec v_absdiff(const _Tpsvec& a, const _Tpsvec& b) \
 OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(v_uint8x16, v_int8x16, 8, (int)0x80808080)
 OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(v_uint16x8, v_int16x8, 16, (int)0x80008000)

+inline v_uint32x4 v_absdiff(const v_uint32x4& a, const v_uint32x4& b)
+{
+    return v_max(a, b) - v_min(a, b);
+}
+
+inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b)
+{
+    __m128i d = _mm_sub_epi32(a.val, b.val);
+    __m128i m = _mm_cmpgt_epi32(b.val, a.val);
+    return v_uint32x4(_mm_sub_epi32(_mm_xor_si128(d, m), m));
+}
+
 #define OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(_Tpvec, _Tp, _Tpreg, suffix, absmask_vec) \
 inline _Tpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
 { \
@ -1047,8 +1077,8 @@ OPENCV_HAL_IMPL_SSE_SELECT(v_uint16x8, si128)
 OPENCV_HAL_IMPL_SSE_SELECT(v_int16x8, si128)
 OPENCV_HAL_IMPL_SSE_SELECT(v_uint32x4, si128)
 OPENCV_HAL_IMPL_SSE_SELECT(v_int32x4, si128)
-OPENCV_HAL_IMPL_SSE_SELECT(v_uint64x2, si128)
-OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, si128)
+// OPENCV_HAL_IMPL_SSE_SELECT(v_uint64x2, si128)
+// OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, si128)
 OPENCV_HAL_IMPL_SSE_SELECT(v_float32x4, ps)
 OPENCV_HAL_IMPL_SSE_SELECT(v_float64x2, pd)

@ -1257,7 +1287,7 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b,
    __m128i v0 = _mm_unpacklo_epi8(u0, u2); // a0 a8 b0 b8 ...
    __m128i v1 = _mm_unpackhi_epi8(u0, u2); // a2 a10 b2 b10 ...
    __m128i v2 = _mm_unpacklo_epi8(u1, u3); // a4 a12 b4 b12 ...
-    __m128i v3 = _mm_unpackhi_epi8(u1, u3); // a6 a14 b4 b14 ...
+    __m128i v3 = _mm_unpackhi_epi8(u1, u3); // a6 a14 b6 b14 ...

    u0 = _mm_unpacklo_epi8(v0, v2); // a0 a4 a8 a12 ...
    u1 = _mm_unpacklo_epi8(v1, v3); // a2 a6 a10 a14 ...
@ -1266,13 +1296,13 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b,

    v0 = _mm_unpacklo_epi8(u0, u1); // a0 a2 a4 a6 ...
    v1 = _mm_unpacklo_epi8(u2, u3); // a1 a3 a5 a7 ...
-    v2 = _mm_unpackhi_epi8(u0, u1); // b0 b2 b4 b6 ...
-    v3 = _mm_unpackhi_epi8(u2, u3); // b1 b3 b5 b7 ...
+    v2 = _mm_unpackhi_epi8(u0, u1); // c0 c2 c4 c6 ...
+    v3 = _mm_unpackhi_epi8(u2, u3); // c1 c3 c5 c7 ...

    a.val = _mm_unpacklo_epi8(v0, v1);
-    b.val = _mm_unpacklo_epi8(v2, v3);
-    c.val = _mm_unpackhi_epi8(v0, v1);
-    d.val = _mm_unpacklo_epi8(v2, v3);
+    b.val = _mm_unpackhi_epi8(v0, v1);
+    c.val = _mm_unpacklo_epi8(v2, v3);
+    d.val = _mm_unpackhi_epi8(v2, v3);
 }

 inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c)
@ -1560,6 +1590,8 @@ inline v_float64x2 v_cvt_f64(const v_float32x4& a)
    return v_float64x2(_mm_cvtps_pd(a.val));
 }

+//! @endcond
+
 }

 #endif
--- a/modules/hal/test/test_intrin.cpp
+++ b/modules/hal/test/test_intrin.cpp
@ -0,0 +1,864 @@
+#include "test_intrin_utils.hpp"
+#include <climits>
+
+using namespace cv;
+
+template<typename R> struct TheTest
+{
+    typedef typename R::lane_type LaneType;
+
+    TheTest & test_loadstore()
+    {
+        AlignedData<R> data;
+        AlignedData<R> out;
+
+        // check if addresses are aligned and unaligned respectively
+        EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
+        EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
+        EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16);
+        EXPECT_NE((size_t)0, (size_t)&out.u.d % 16);
+
+        // check some initialization methods
+        R r1 = data.a;
+        R r2 = v_load(data.u.d);
+        R r3 = v_load_aligned(data.a.d);
+        R r4(r2);
+        EXPECT_EQ(data.a[0], r1.get0());
+        EXPECT_EQ(data.u[0], r2.get0());
+        EXPECT_EQ(data.a[0], r3.get0());
+        EXPECT_EQ(data.u[0], r4.get0());
+
+        // check some store methods
+        out.u.clear();
+        out.a.clear();
+        v_store(out.u.d, r1);
+        v_store_aligned(out.a.d, r2);
+        EXPECT_EQ(data.a, out.a);
+        EXPECT_EQ(data.u, out.u);
+
+        // check more store methods
+        Data<R> d, res(0);
+        R r5 = d;
+        v_store_high(res.mid(), r5);
+        v_store_low(res.d, r5);
+        EXPECT_EQ(d, res);
+
+        // check halves load correctness
+        res.clear();
+        R r6 = v_load_halves(d.d, d.mid());
+        v_store(res.d, r6);
+        EXPECT_EQ(d, res);
+
+        // zero, all
+        Data<R> resZ = RegTrait<R>::zero();
+        Data<R> resV = RegTrait<R>::all(8);
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            EXPECT_EQ((LaneType)0, resZ[i]);
+            EXPECT_EQ((LaneType)8, resV[i]);
+        }
+
+        // reinterpret_as
+        v_uint8x16 vu8 = v_reinterpret_as_u8(r1); out.a.clear(); v_store((uchar*)out.a.d, vu8); EXPECT_EQ(data.a, out.a);
+        v_int8x16 vs8 = v_reinterpret_as_s8(r1); out.a.clear(); v_store((schar*)out.a.d, vs8); EXPECT_EQ(data.a, out.a);
+        v_uint16x8 vu16 = v_reinterpret_as_u16(r1); out.a.clear(); v_store((ushort*)out.a.d, vu16); EXPECT_EQ(data.a, out.a);
+        v_int16x8 vs16 = v_reinterpret_as_s16(r1); out.a.clear(); v_store((short*)out.a.d, vs16); EXPECT_EQ(data.a, out.a);
+        v_uint32x4 vu32 = v_reinterpret_as_u32(r1); out.a.clear(); v_store((unsigned*)out.a.d, vu32); EXPECT_EQ(data.a, out.a);
+        v_int32x4 vs32 = v_reinterpret_as_s32(r1); out.a.clear(); v_store((int*)out.a.d, vs32); EXPECT_EQ(data.a, out.a);
+        v_uint64x2 vu64 = v_reinterpret_as_u64(r1); out.a.clear(); v_store((uint64*)out.a.d, vu64); EXPECT_EQ(data.a, out.a);
+        v_int64x2 vs64 = v_reinterpret_as_s64(r1); out.a.clear(); v_store((int64*)out.a.d, vs64); EXPECT_EQ(data.a, out.a);
+        v_float32x4 vf32 = v_reinterpret_as_f32(r1); out.a.clear(); v_store((float*)out.a.d, vf32); EXPECT_EQ(data.a, out.a);
+#if CV_SIMD128_64F
+        v_float64x2 vf64 = v_reinterpret_as_f64(r1); out.a.clear(); v_store((double*)out.a.d, vf64); EXPECT_EQ(data.a, out.a);
+#endif
+
+        return *this;
+    }
+
+    TheTest & test_interleave()
+    {
+        Data<R> data1, data2, data3, data4;
+        data2 += 20;
+        data3 += 40;
+        data4 += 60;
+
+
+        R a = data1, b = data2, c = data3;
+        R d = data1, e = data2, f = data3, g = data4;
+
+        LaneType buf3[R::nlanes * 3];
+        LaneType buf4[R::nlanes * 4];
+
+        v_store_interleave(buf3, a, b, c);
+        v_store_interleave(buf4, d, e, f, g);
+
+        Data<R> z(0);
+        a = b = c = d = e = f = g = z;
+
+        v_load_deinterleave(buf3, a, b, c);
+        v_load_deinterleave(buf4, d, e, f, g);
+
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            EXPECT_EQ(data1, Data<R>(a));
+            EXPECT_EQ(data2, Data<R>(b));
+            EXPECT_EQ(data3, Data<R>(c));
+
+            EXPECT_EQ(data1, Data<R>(d));
+            EXPECT_EQ(data2, Data<R>(e));
+            EXPECT_EQ(data3, Data<R>(f));
+            EXPECT_EQ(data4, Data<R>(g));
+        }
+
+        return *this;
+    }
+
+    // v_expand and v_load_expand
+    TheTest & test_expand()
+    {
+        typedef typename RegTrait<R>::w_reg Rx2;
+        Data<R> dataA;
+        R a = dataA;
+
+        Data<Rx2> resB = v_load_expand(dataA.d);
+
+        Rx2 c, d;
+        v_expand(a, c, d);
+
+        Data<Rx2> resC = c, resD = d;
+        const int n = Rx2::nlanes;
+        for (int i = 0; i < n; ++i)
+        {
+            EXPECT_EQ(dataA[i], resB[i]);
+            EXPECT_EQ(dataA[i], resC[i]);
+            EXPECT_EQ(dataA[i + n], resD[i]);
+        }
+
+        return *this;
+    }
+
+    TheTest & test_expand_q()
+    {
+        typedef typename RegTrait<R>::q_reg Rx4;
+        Data<R> data;
+        Data<Rx4> out = v_load_expand_q(data.d);
+        const int n = Rx4::nlanes;
+        for (int i = 0; i < n; ++i)
+            EXPECT_EQ(data[i], out[i]);
+
+        return *this;
+    }
+
+    TheTest & test_addsub()
+    {
+        Data<R> dataA, dataB;
+        dataB.reverse();
+        R a = dataA, b = dataB;
+
+        Data<R> resC = a + b, resD = a - b;
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            EXPECT_EQ(saturate_cast<LaneType>(dataA[i] + dataB[i]), resC[i]);
+            EXPECT_EQ(saturate_cast<LaneType>(dataA[i] - dataB[i]), resD[i]);
+        }
+
+        return *this;
+    }
+
+    TheTest & test_addsub_wrap()
+    {
+        Data<R> dataA, dataB;
+        dataB.reverse();
+        R a = dataA, b = dataB;
+
+        Data<R> resC = v_add_wrap(a, b),
+                resD = v_sub_wrap(a, b);
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            EXPECT_EQ((LaneType)(dataA[i] + dataB[i]), resC[i]);
+            EXPECT_EQ((LaneType)(dataA[i] - dataB[i]), resD[i]);
+        }
+        return *this;
+    }
+
+    TheTest & test_mul()
+    {
+        Data<R> dataA, dataB;
+        dataB.reverse();
+        R a = dataA, b = dataB;
+
+        Data<R> resC = a * b;
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            EXPECT_EQ(dataA[i] * dataB[i], resC[i]);
+        }
+
+        return *this;
+    }
+
+    TheTest & test_div()
+    {
+        Data<R> dataA, dataB;
+        dataB.reverse();
+        R a = dataA, b = dataB;
+
+        Data<R> resC = a / b;
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            EXPECT_EQ(dataA[i] / dataB[i], resC[i]);
+        }
+
+        return *this;
+    }
+
+    TheTest & test_mul_expand()
+    {
+        typedef typename RegTrait<R>::w_reg Rx2;
+        Data<R> dataA, dataB(2);
+        R a = dataA, b = dataB;
+        Rx2 c, d;
+
+        v_mul_expand(a, b, c, d);
+
+        Data<Rx2> resC = c, resD = d;
+        const int n = R::nlanes / 2;
+        for (int i = 0; i < n; ++i)
+        {
+            EXPECT_EQ((typename Rx2::lane_type)dataA[i] * dataB[i], resC[i]);
+            EXPECT_EQ((typename Rx2::lane_type)dataA[i + n] * dataB[i + n], resD[i]);
+        }
+
+        return *this;
+    }
+
+    template <int s>
+    TheTest & test_shift()
+    {
+        Data<R> dataA;
+        R a = dataA;
+
+        Data<R> resB = a << s, resC = v_shl<s>(a), resD = a >> s, resE = v_shr<s>(a);
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            EXPECT_EQ(dataA[i] << s, resB[i]);
+            EXPECT_EQ(dataA[i] << s, resC[i]);
+            EXPECT_EQ(dataA[i] >> s, resD[i]);
+            EXPECT_EQ(dataA[i] >> s, resE[i]);
+        }
+        return *this;
+    }
+
+    TheTest & test_cmp()
+    {
+        Data<R> dataA, dataB;
+        dataB.reverse();
+        dataB += 1;
+        R a = dataA, b = dataB;
+
+        Data<R> resC = (a == b);
+        Data<R> resD = (a != b);
+        Data<R> resE = (a > b);
+        Data<R> resF = (a >= b);
+        Data<R> resG = (a < b);
+        Data<R> resH = (a <= b);
+
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0);
+            EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0);
+            EXPECT_EQ(dataA[i] >  dataB[i], resE[i] != 0);
+            EXPECT_EQ(dataA[i] >= dataB[i], resF[i] != 0);
+            EXPECT_EQ(dataA[i] <  dataB[i], resG[i] != 0);
+            EXPECT_EQ(dataA[i] <= dataB[i], resH[i] != 0);
+        }
+        return *this;
+    }
+
+    TheTest & test_dot_prod()
+    {
+        typedef typename RegTrait<R>::w_reg Rx2;
+        Data<R> dataA, dataB(2);
+        R a = dataA, b = dataB;
+
+        Data<Rx2> res = v_dotprod(a, b);
+
+        const int n = R::nlanes / 2;
+        for (int i = 0; i < n; ++i)
+        {
+            EXPECT_EQ(dataA[i*2] * dataB[i*2] + dataA[i*2 + 1] * dataB[i*2 + 1], res[i]);
+        }
+        return *this;
+    }
+
+    TheTest & test_logic()
+    {
+        Data<R> dataA, dataB(2);
+        R a = dataA, b = dataB;
+
+        Data<R> resC = a & b, resD = a | b, resE = a ^ b, resF = ~a;
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            EXPECT_EQ(dataA[i] & dataB[i], resC[i]);
+            EXPECT_EQ(dataA[i] | dataB[i], resD[i]);
+            EXPECT_EQ(dataA[i] ^ dataB[i], resE[i]);
+            EXPECT_EQ((LaneType)~dataA[i], resF[i]);
+        }
+
+        return *this;
+    }
+
+    TheTest & test_sqrt_abs()
+    {
+        Data<R> dataA, dataD;
+        dataD *= -1.0;
+        R a = dataA, d = dataD;
+
+        Data<R> resB = v_sqrt(a), resC = v_invsqrt(a), resE = v_abs(d);
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            EXPECT_FLOAT_EQ((float)std::sqrt(dataA[i]), (float)resB[i]);
+            EXPECT_FLOAT_EQ(1/(float)std::sqrt(dataA[i]), (float)resC[i]);
+            EXPECT_FLOAT_EQ((float)abs(dataA[i]), (float)resE[i]);
+        }
+
+        return *this;
+    }
+
+    TheTest & test_min_max()
+    {
+        Data<R> dataA, dataB;
+        dataB.reverse();
+        R a = dataA, b = dataB;
+
+        Data<R> resC = v_min(a, b), resD = v_max(a, b);
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            EXPECT_EQ(std::min(dataA[i], dataB[i]), resC[i]);
+            EXPECT_EQ(std::max(dataA[i], dataB[i]), resD[i]);
+        }
+
+        return *this;
+    }
+
+    TheTest & test_absdiff()
+    {
+        typedef typename RegTrait<R>::u_reg Ru;
+        typedef typename Ru::lane_type u_type;
+        Data<R> dataA(std::numeric_limits<LaneType>::max()),
+                dataB(std::numeric_limits<LaneType>::min());
+        dataA[0] = (LaneType)-1;
+        dataB[0] = 1;
+        dataA[1] = 2;
+        dataB[1] = (LaneType)-2;
+        R a = dataA, b = dataB;
+        Data<Ru> resC = v_absdiff(a, b);
+        const u_type mask = std::numeric_limits<LaneType>::is_signed ? (u_type)(1 << (sizeof(u_type)*8 - 1)) : 0;
+        for (int i = 0; i < Ru::nlanes; ++i)
+        {
+            u_type uA = dataA[i] ^ mask;
+            u_type uB = dataB[i] ^ mask;
+            EXPECT_EQ(uA > uB ? uA - uB : uB - uA, resC[i]);
+        }
+        return *this;
+    }
+
+    TheTest & test_float_absdiff()
+    {
+        Data<R> dataA(std::numeric_limits<LaneType>::max()),
+                dataB(std::numeric_limits<LaneType>::min());
+        dataA[0] = -1;
+        dataB[0] = 1;
+        dataA[1] = 2;
+        dataB[1] = -2;
+        R a = dataA, b = dataB;
+        Data<R> resC = v_absdiff(a, b);
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            EXPECT_EQ(dataA[i] > dataB[i] ? dataA[i] - dataB[i] : dataB[i] - dataA[i], resC[i]);
+        }
+        return *this;
+    }
+
+    TheTest & test_reduce()
+    {
+        Data<R> dataA;
+        R a = dataA;
+        EXPECT_EQ((LaneType)1, v_reduce_min(a));
+        EXPECT_EQ((LaneType)R::nlanes, v_reduce_max(a));
+        EXPECT_EQ((LaneType)(1 + R::nlanes)*2, v_reduce_sum(a));
+        return *this;
+    }
+
+    TheTest & test_mask()
+    {
+        Data<R> dataA, dataB, dataC, dataD(1), dataE(2);
+        dataA[1] *= (LaneType)-1;
+        dataC *= (LaneType)-1;
+        R a = dataA, b = dataB, c = dataC, d = dataD, e = dataE;
+
+        int m = v_signmask(a);
+        EXPECT_EQ(2, m);
+
+        EXPECT_EQ(false, v_check_all(a));
+        EXPECT_EQ(false, v_check_all(b));
+        EXPECT_EQ(true, v_check_all(c));
+
+        EXPECT_EQ(true, v_check_any(a));
+        EXPECT_EQ(false, v_check_any(b));
+        EXPECT_EQ(true, v_check_any(c));
+
+        typedef V_TypeTraits<LaneType> Traits;
+        typedef typename Traits::int_type int_type;
+
+        R f = v_select(b, d, e);
+        Data<R> resF = f;
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            int_type m2 = Traits::reinterpret_int(dataB[i]);
+            EXPECT_EQ((Traits::reinterpret_int(dataD[i]) & m2)
+                    | (Traits::reinterpret_int(dataE[i]) & ~m2),
+                      Traits::reinterpret_int(resF[i]));
+        }
+
+        return *this;
+    }
+
+    template <int s>
+    TheTest & test_pack()
+    {
+        typedef typename RegTrait<R>::w_reg Rx2;
+        typedef typename Rx2::lane_type w_type;
+        Data<Rx2> dataA, dataB;
+        dataA += std::numeric_limits<LaneType>::is_signed ? -10 : 10;
+        dataB *= 10;
+        Rx2 a = dataA, b = dataB;
+
+        Data<R> resC = v_pack(a, b);
+        Data<R> resD = v_rshr_pack<s>(a, b);
+
+        Data<R> resE(0);
+        v_pack_store(resE.d, b);
+
+        Data<R> resF(0);
+        v_rshr_pack_store<s>(resF.d, b);
+
+        const int n = Rx2::nlanes;
+        const w_type add = (w_type)1 << (s - 1);
+        for (int i = 0; i < n; ++i)
+        {
+            EXPECT_EQ(saturate_cast<LaneType>(dataA[i]), resC[i]);
+            EXPECT_EQ(saturate_cast<LaneType>(dataB[i]), resC[i + n]);
+            EXPECT_EQ(saturate_cast<LaneType>((dataA[i] + add) >> s), resD[i]);
+            EXPECT_EQ(saturate_cast<LaneType>((dataB[i] + add) >> s), resD[i + n]);
+            EXPECT_EQ(saturate_cast<LaneType>(dataB[i]), resE[i]);
+            EXPECT_EQ((LaneType)0, resE[i + n]);
+            EXPECT_EQ(saturate_cast<LaneType>((dataB[i] + add) >> s), resF[i]);
+            EXPECT_EQ((LaneType)0, resF[i + n]);
+        }
+        return *this;
+    }
+
+    template <int s>
+    TheTest & test_pack_u()
+    {
+        typedef typename RegTrait<R>::w_reg Rx2;
+        typedef typename RegTrait<Rx2>::int_reg Ri2;
+        typedef typename Ri2::lane_type w_type;
+
+        Data<Ri2> dataA, dataB;
+        dataA += -10;
+        dataB *= 10;
+        Ri2 a = dataA, b = dataB;
+
+        Data<R> resC = v_pack_u(a, b);
+        Data<R> resD = v_rshr_pack_u<s>(a, b);
+
+        Data<R> resE(0);
+        v_pack_u_store(resE.d, b);
+
+        Data<R> resF(0);
+        v_rshr_pack_u_store<s>(resF.d, b);
+
+        const int n = Ri2::nlanes;
+        const w_type add = (w_type)1 << (s - 1);
+        for (int i = 0; i < n; ++i)
+        {
+            EXPECT_EQ(saturate_cast<LaneType>(dataA[i]), resC[i]);
+            EXPECT_EQ(saturate_cast<LaneType>(dataB[i]), resC[i + n]);
+            EXPECT_EQ(saturate_cast<LaneType>((dataA[i] + add) >> s), resD[i]);
+            EXPECT_EQ(saturate_cast<LaneType>((dataB[i] + add) >> s), resD[i + n]);
+            EXPECT_EQ(saturate_cast<LaneType>(dataB[i]), resE[i]);
+            EXPECT_EQ((LaneType)0, resE[i + n]);
+            EXPECT_EQ(saturate_cast<LaneType>((dataB[i] + add) >> s), resF[i]);
+            EXPECT_EQ((LaneType)0, resF[i + n]);
+        }
+        return *this;
+    }
+
+    TheTest & test_unpack()
+    {
+        Data<R> dataA, dataB;
+        dataB *= 10;
+        R a = dataA, b = dataB;
+
+        R c, d, e, f, lo, hi;
+        v_zip(a, b, c, d);
+        v_recombine(a, b, e, f);
+        lo = v_combine_low(a, b);
+        hi = v_combine_high(a, b);
+
+        Data<R> resC = c, resD = d, resE = e, resF = f, resLo = lo, resHi = hi;
+
+        const int n = R::nlanes/2;
+        for (int i = 0; i < n; ++i)
+        {
+            EXPECT_EQ(dataA[i], resC[i*2]);
+            EXPECT_EQ(dataB[i], resC[i*2+1]);
+            EXPECT_EQ(dataA[i+n], resD[i*2]);
+            EXPECT_EQ(dataB[i+n], resD[i*2+1]);
+
+            EXPECT_EQ(dataA[i], resE[i]);
+            EXPECT_EQ(dataB[i], resE[i+n]);
+            EXPECT_EQ(dataA[i+n], resF[i]);
+            EXPECT_EQ(dataB[i+n], resF[i+n]);
+
+            EXPECT_EQ(dataA[i], resLo[i]);
+            EXPECT_EQ(dataB[i], resLo[i+n]);
+            EXPECT_EQ(dataA[i+n], resHi[i]);
+            EXPECT_EQ(dataB[i+n], resHi[i+n]);
+        }
+
+        return *this;
+    }
+
+    template<int s>
+    TheTest & test_extract()
+    {
+        Data<R> dataA, dataB;
+        dataB *= 10;
+        R a = dataA, b = dataB;
+
+        Data<R> resC = v_extract<s>(a, b);
+
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            if (i + s >= R::nlanes)
+                EXPECT_EQ(dataB[i - R::nlanes + s], resC[i]);
+            else
+                EXPECT_EQ(dataA[i + s], resC[i]);
+        }
+
+        return *this;
+    }
+
+    TheTest & test_float_math()
+    {
+        typedef typename RegTrait<R>::int_reg Ri;
+        Data<R> data1, data2, data3;
+        data1 *= 1.1;
+        data2 += 10;
+        R a1 = data1, a2 = data2, a3 = data3;
+
+        Data<Ri> resB = v_round(a1),
+                 resC = v_trunc(a1),
+                 resD = v_floor(a1),
+                 resE = v_ceil(a1);
+
+        Data<R> resF = v_magnitude(a1, a2),
+                resG = v_sqr_magnitude(a1, a2),
+                resH = v_muladd(a1, a2, a3);
+
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            EXPECT_EQ(cvRound(data1[i]), resB[i]);
+            EXPECT_EQ((typename Ri::lane_type)data1[i], resC[i]);
+            EXPECT_EQ(cvFloor(data1[i]), resD[i]);
+            EXPECT_EQ(cvCeil(data1[i]), resE[i]);
+
+            EXPECT_DOUBLE_EQ(std::sqrt(data1[i]*data1[i] + data2[i]*data2[i]), resF[i]);
+            EXPECT_DOUBLE_EQ(data1[i]*data1[i] + data2[i]*data2[i], resG[i]);
+            EXPECT_DOUBLE_EQ(data1[i]*data2[i] + data3[i], resH[i]);
+        }
+
+        return *this;
+    }
+
+    TheTest & test_float_cvt32()
+    {
+        typedef v_float32x4 Rt;
+        Data<R> dataA;
+        dataA *= 1.1;
+        R a = dataA;
+        Rt b = v_cvt_f32(a);
+        Data<Rt> resB = b;
+        int n = std::min<int>(Rt::nlanes, R::nlanes);
+        for (int i = 0; i < n; ++i)
+        {
+            EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]);
+        }
+        return *this;
+    }
+
+    TheTest & test_float_cvt64()
+    {
+#if CV_SIMD128_64F
+        typedef v_float64x2 Rt;
+        Data<R> dataA;
+        dataA *= 1.1;
+        R a = dataA;
+        Rt b = v_cvt_f64(a);
+        Data<Rt> resB = b;
+        int n = std::min<int>(Rt::nlanes, R::nlanes);
+        for (int i = 0; i < n; ++i)
+        {
+            EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]);
+        }
+#endif
+        return *this;
+    }
+
+    TheTest & test_matmul()
+    {
+        Data<R> dataV, dataA, dataB, dataC, dataD;
+        dataB.reverse();
+        dataC += 2;
+        dataD *= 0.3;
+        R v = dataV, a = dataA, b = dataB, c = dataC, d = dataD;
+
+        Data<R> res = v_matmul(v, a, b, c, d);
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            LaneType val = dataV[0] * dataA[i]
+                                      + dataV[1] * dataB[i]
+                                      + dataV[2] * dataC[i]
+                                      + dataV[3] * dataD[i];
+            EXPECT_DOUBLE_EQ(val, res[i]);
+        }
+        return *this;
+    }
+
+    TheTest & test_transpose()
+    {
+        Data<R> dataA, dataB, dataC, dataD;
+        dataB *= 5;
+        dataC *= 10;
+        dataD *= 15;
+        R a = dataA, b = dataB, c = dataC, d = dataD;
+        R e, f, g, h;
+        v_transpose4x4(a, b, c, d,
+                       e, f, g, h);
+
+        Data<R> res[4] = {e, f, g, h};
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            EXPECT_EQ(dataA[i], res[i][0]);
+            EXPECT_EQ(dataB[i], res[i][1]);
+            EXPECT_EQ(dataC[i], res[i][2]);
+            EXPECT_EQ(dataD[i], res[i][3]);
+        }
+        return *this;
+    }
+
+};
+
+
+//=============  8-bit integer =====================================================================
+
+TEST(hal_intrin, uint8x16) {
+    TheTest<v_uint8x16>()
+        .test_loadstore()
+        .test_interleave()
+        .test_expand()
+        .test_expand_q()
+        .test_addsub()
+        .test_addsub_wrap()
+        .test_cmp()
+        .test_logic()
+        .test_min_max()
+        .test_absdiff()
+        .test_mask()
+        .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
+        .test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>()
+        .test_unpack()
+        .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
+        ;
+}
+
+TEST(hal_intrin, int8x16) {
+    TheTest<v_int8x16>()
+        .test_loadstore()
+        .test_interleave()
+        .test_expand()
+        .test_expand_q()
+        .test_addsub()
+        .test_addsub_wrap()
+        .test_cmp()
+        .test_logic()
+        .test_min_max()
+        .test_absdiff()
+        .test_mask()
+        .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
+        .test_unpack()
+        .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
+        ;
+}
+
+//============= 16-bit integer =====================================================================
+
+TEST(hal_intrin, uint16x8) {
+    TheTest<v_uint16x8>()
+        .test_loadstore()
+        .test_interleave()
+        .test_expand()
+        .test_addsub()
+        .test_addsub_wrap()
+        .test_mul()
+        .test_mul_expand()
+        .test_cmp()
+        .test_shift<1>()
+        .test_shift<8>()
+        .test_logic()
+        .test_min_max()
+        .test_absdiff()
+        .test_mask()
+        .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
+        .test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>()
+        .test_unpack()
+        .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
+        ;
+}
+
+TEST(hal_intrin, int16x8) {
+    TheTest<v_int16x8>()
+        .test_loadstore()
+        .test_interleave()
+        .test_expand()
+        .test_addsub()
+        .test_addsub_wrap()
+        .test_mul()
+        .test_mul_expand()
+        .test_cmp()
+        .test_shift<1>()
+        .test_shift<8>()
+        .test_dot_prod()
+        .test_logic()
+        .test_min_max()
+        .test_absdiff()
+        .test_mask()
+        .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
+        .test_unpack()
+        .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
+        ;
+}
+
+//============= 32-bit integer =====================================================================
+
+TEST(hal_intrin, uint32x4) {
+    TheTest<v_uint32x4>()
+        .test_loadstore()
+        .test_interleave()
+        .test_expand()
+        .test_addsub()
+        .test_mul()
+        .test_mul_expand()
+        .test_cmp()
+        .test_shift<1>()
+        .test_shift<8>()
+        .test_logic()
+        .test_min_max()
+        .test_absdiff()
+        .test_reduce()
+        .test_mask()
+        .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
+        .test_unpack()
+        .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
+        .test_transpose()
+        ;
+}
+
+TEST(hal_intrin, int32x4) {
+    TheTest<v_int32x4>()
+        .test_loadstore()
+        .test_interleave()
+        .test_expand()
+        .test_addsub()
+        .test_mul()
+        .test_cmp()
+        .test_shift<1>().test_shift<8>()
+        .test_logic()
+        .test_min_max()
+        .test_absdiff()
+        .test_reduce()
+        .test_mask()
+        .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
+        .test_unpack()
+        .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
+        .test_float_cvt32()
+        .test_float_cvt64()
+        .test_transpose()
+        ;
+}
+
+//============= 64-bit integer =====================================================================
+
+TEST(hal_intrin, uint64x2) {
+    TheTest<v_uint64x2>()
+        .test_loadstore()
+        .test_addsub()
+        .test_shift<1>().test_shift<8>()
+        .test_logic()
+        .test_extract<0>().test_extract<1>()
+        ;
+}
+
+TEST(hal_intrin, int64x2) {
+    TheTest<v_int64x2>()
+        .test_loadstore()
+        .test_addsub()
+        .test_shift<1>().test_shift<8>()
+        .test_logic()
+        .test_extract<0>().test_extract<1>()
+        ;
+}
+
+//============= Floating point =====================================================================
+
+TEST(hal_intrin, float32x4) {
+    TheTest<v_float32x4>()
+        .test_loadstore()
+        .test_interleave()
+        .test_addsub()
+        .test_mul()
+        .test_div()
+        .test_cmp()
+        .test_sqrt_abs()
+        .test_min_max()
+        .test_float_absdiff()
+        .test_reduce()
+        .test_mask()
+        .test_unpack()
+        .test_float_math()
+        .test_float_cvt64()
+        .test_matmul()
+        .test_transpose()
+        ;
+}
+
+#if CV_SIMD128_64F
+TEST(hal_intrin, float64x2) {
+    TheTest<v_float64x2>()
+        .test_loadstore()
+        .test_addsub()
+        .test_mul()
+        .test_div()
+        .test_cmp()
+        .test_sqrt_abs()
+        .test_min_max()
+        .test_float_absdiff()
+        .test_mask()
+        .test_unpack()
+        .test_float_math()
+        .test_float_cvt32()
+        ;
+}
+#endif
--- a/modules/hal/test/test_intrin_utils.hpp
+++ b/modules/hal/test/test_intrin_utils.hpp
@ -0,0 +1,234 @@
+#ifndef _TEST_UTILS_HPP_
+#define _TEST_UTILS_HPP_
+
+#include "opencv2/hal/intrin.hpp"
+#include "opencv2/ts.hpp"
+#include <ostream>
+#include <algorithm>
+
+template <typename R> struct Data;
+template <int N> struct initializer;
+
+template <> struct initializer<16>
+{
+    template <typename R> static R init(const Data<R> & d)
+    {
+        return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]);
+    }
+};
+
+template <> struct initializer<8>
+{
+    template <typename R> static R init(const Data<R> & d)
+    {
+        return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]);
+    }
+};
+
+template <> struct initializer<4>
+{
+    template <typename R> static R init(const Data<R> & d)
+    {
+        return R(d[0], d[1], d[2], d[3]);
+    }
+};
+
+template <> struct initializer<2>
+{
+    template <typename R> static R init(const Data<R> & d)
+    {
+        return R(d[0], d[1]);
+    }
+};
+
+//==================================================================================================
+
+template <typename R> struct Data
+{
+    typedef typename R::lane_type LaneType;
+    Data()
+    {
+        for (int i = 0; i < R::nlanes; ++i)
+            d[i] = (LaneType)(i + 1);
+    }
+    Data(LaneType val)
+    {
+        fill(val);
+    }
+    Data(const R & r)
+    {
+        *this = r;
+    }
+    operator R ()
+    {
+        return initializer<R::nlanes>().init(*this);
+    }
+    Data<R> & operator=(const R & r)
+    {
+        v_store(d, r);
+        return *this;
+    }
+    template <typename T> Data<R> & operator*=(T m)
+    {
+        for (int i = 0; i < R::nlanes; ++i)
+            d[i] *= (LaneType)m;
+        return *this;
+    }
+    template <typename T> Data<R> & operator+=(T m)
+    {
+        for (int i = 0; i < R::nlanes; ++i)
+            d[i] += (LaneType)m;
+        return *this;
+    }
+    void fill(LaneType val)
+    {
+        for (int i = 0; i < R::nlanes; ++i)
+            d[i] = val;
+    }
+    void reverse()
+    {
+        for (int i = 0; i < R::nlanes / 2; ++i)
+            std::swap(d[i], d[R::nlanes - i - 1]);
+    }
+    const LaneType & operator[](int i) const
+    {
+        CV_Assert(i >= 0 && i < R::nlanes);
+        return d[i];
+    }
+    LaneType & operator[](int i)
+    {
+        CV_Assert(i >= 0 && i < R::nlanes);
+        return d[i];
+    }
+    const LaneType * mid() const
+    {
+        return d + R::nlanes / 2;
+    }
+    LaneType * mid()
+    {
+        return d + R::nlanes / 2;
+    }
+    bool operator==(const Data<R> & other) const
+    {
+        for (int i = 0; i < R::nlanes; ++i)
+            if (d[i] != other.d[i])
+                return false;
+        return true;
+    }
+    void clear()
+    {
+        fill(0);
+    }
+    bool isZero() const
+    {
+        return isValue(0);
+    }
+    bool isValue(uchar val) const
+    {
+        for (int i = 0; i < R::nlanes; ++i)
+            if (d[i] != val)
+                return false;
+        return true;
+    }
+
+    LaneType d[R::nlanes];
+};
+
+template<typename R> struct AlignedData
+{
+    Data<R> CV_DECL_ALIGNED(16) a; // aligned
+    char dummy;
+    Data<R> u; // unaligned
+};
+
+template <typename R> std::ostream & operator<<(std::ostream & out, const Data<R> & d)
+{
+    out << "{ ";
+    for (int i = 0; i < R::nlanes; ++i)
+    {
+        // out << std::hex << +V_TypeTraits<typename R::lane_type>::reinterpret_int(d.d[i]);
+        out << +d.d[i];
+        if (i + 1 < R::nlanes)
+            out << ", ";
+    }
+    out << " }";
+    return out;
+}
+
+//==================================================================================================
+
+template <typename R> struct RegTrait;
+
+template <> struct RegTrait<cv::v_uint8x16> {
+    typedef cv::v_uint16x8 w_reg;
+    typedef cv::v_uint32x4 q_reg;
+    typedef cv::v_uint8x16 u_reg;
+    static cv::v_uint8x16 zero() { return cv::v_setzero_u8(); }
+    static cv::v_uint8x16 all(uchar val) { return cv::v_setall_u8(val); }
+};
+template <> struct RegTrait<cv::v_int8x16> {
+    typedef cv::v_int16x8 w_reg;
+    typedef cv::v_int32x4 q_reg;
+    typedef cv::v_uint8x16 u_reg;
+    static cv::v_int8x16 zero() { return cv::v_setzero_s8(); }
+    static cv::v_int8x16 all(schar val) { return cv::v_setall_s8(val); }
+};
+
+template <> struct RegTrait<cv::v_uint16x8> {
+    typedef cv::v_uint32x4 w_reg;
+    typedef cv::v_int16x8 int_reg;
+    typedef cv::v_uint16x8 u_reg;
+    static cv::v_uint16x8 zero() { return cv::v_setzero_u16(); }
+    static cv::v_uint16x8 all(ushort val) { return cv::v_setall_u16(val); }
+};
+
+template <> struct RegTrait<cv::v_int16x8> {
+    typedef cv::v_int32x4 w_reg;
+    typedef cv::v_uint16x8 u_reg;
+    static cv::v_int16x8 zero() { return cv::v_setzero_s16(); }
+    static cv::v_int16x8 all(short val) { return cv::v_setall_s16(val); }
+};
+
+template <> struct RegTrait<cv::v_uint32x4> {
+    typedef cv::v_uint64x2 w_reg;
+    typedef cv::v_int32x4 int_reg;
+    typedef cv::v_uint32x4 u_reg;
+    static cv::v_uint32x4 zero() { return cv::v_setzero_u32(); }
+    static cv::v_uint32x4 all(unsigned val) { return cv::v_setall_u32(val); }
+};
+
+template <> struct RegTrait<cv::v_int32x4> {
+    typedef cv::v_int64x2 w_reg;
+    typedef cv::v_uint32x4 u_reg;
+    static cv::v_int32x4 zero() { return cv::v_setzero_s32(); }
+    static cv::v_int32x4 all(int val) { return cv::v_setall_s32(val); }
+};
+
+template <> struct RegTrait<cv::v_uint64x2> {
+    static cv::v_uint64x2 zero() { return cv::v_setzero_u64(); }
+    static cv::v_uint64x2 all(uint64 val) { return cv::v_setall_u64(val); }
+};
+
+template <> struct RegTrait<cv::v_int64x2> {
+    static cv::v_int64x2 zero() { return cv::v_setzero_s64(); }
+    static cv::v_int64x2 all(int64 val) { return cv::v_setall_s64(val); }
+};
+
+template <> struct RegTrait<cv::v_float32x4> {
+    typedef cv::v_int32x4 int_reg;
+    typedef cv::v_float32x4 u_reg;
+    static cv::v_float32x4 zero() { return cv::v_setzero_f32(); }
+    static cv::v_float32x4 all(float val) { return cv::v_setall_f32(val); }
+};
+
+#if CV_SIMD128_64F
+template <> struct RegTrait<cv::v_float64x2> {
+    typedef cv::v_int32x4 int_reg;
+    typedef cv::v_float64x2 u_reg;
+    static cv::v_float64x2 zero() { return cv::v_setzero_f64(); }
+    static cv::v_float64x2 all(double val) { return cv::v_setall_f64(val); }
+};
+
+#endif
+
+#endif
--- a/modules/hal/test/test_main.cpp
+++ b/modules/hal/test/test_main.cpp
@ -0,0 +1,3 @@
+#include "opencv2/ts.hpp"
+
+CV_TEST_MAIN("cv")
--- a/modules/hal/test/test_precomp.hpp
+++ b/modules/hal/test/test_precomp.hpp
@ -0,0 +1,11 @@
+#ifndef __OPENCV_HAL_TEST_PRECOMP_HPP__
+#define __OPENCV_HAL_TEST_PRECOMP_HPP__
+
+#include <iostream>
+#include <limits>
+#include "opencv2/ts.hpp"
+#include "opencv2/hal.hpp"
+#include "opencv2/hal/defs.h"
+#include "opencv2/hal/intrin.hpp"
+
+#endif
--- a/modules/highgui/include/opencv2/highgui.hpp
+++ b/modules/highgui/include/opencv2/highgui.hpp
@ -82,42 +82,44 @@ It provides easy interface to:
    See below the example used to generate the figure:
    @code
        int main(int argc, char *argv[])
+        {
+
            int value = 50;
            int value2 = 0;

-            cvNamedWindow("main1",CV_WINDOW_NORMAL);
-            cvNamedWindow("main2",CV_WINDOW_AUTOSIZE | CV_GUI_NORMAL);

-            cvCreateTrackbar( "track1", "main1", &value, 255,  NULL);//OK tested
-            char* nameb1 = "button1";
-            char* nameb2 = "button2";
-            cvCreateButton(nameb1,callbackButton,nameb1,CV_CHECKBOX,1);
+            namedWindow("main1",WINDOW_NORMAL);
+            namedWindow("main2",WINDOW_AUTOSIZE | CV_GUI_NORMAL);
+            createTrackbar( "track1", "main1", &value, 255,  NULL);
+
+            String nameb1 = "button1";
+            String nameb2 = "button2";

-            cvCreateButton(nameb2,callbackButton,nameb2,CV_CHECKBOX,0);
-            cvCreateTrackbar( "track2", NULL, &value2, 255, NULL);
-            cvCreateButton("button5",callbackButton1,NULL,CV_RADIOBOX,0);
-            cvCreateButton("button6",callbackButton2,NULL,CV_RADIOBOX,1);
+            createButton(nameb1,callbackButton,&nameb1,QT_CHECKBOX,1);
+            createButton(nameb2,callbackButton,NULL,QT_CHECKBOX,0);
+            createTrackbar( "track2", NULL, &value2, 255, NULL);
+            createButton("button5",callbackButton1,NULL,QT_RADIOBOX,0);
+            createButton("button6",callbackButton2,NULL,QT_RADIOBOX,1);

-            cvSetMouseCallback( "main2",on_mouse,NULL );
+            setMouseCallback( "main2",on_mouse,NULL );

-            IplImage* img1 = cvLoadImage("files/flower.jpg");
-            IplImage* img2 = cvCreateImage(cvGetSize(img1),8,3);
-            CvCapture* video = cvCaptureFromFile("files/hockey.avi");
-            IplImage* img3 = cvCreateImage(cvGetSize(cvQueryFrame(video)),8,3);
+            Mat img1 = imread("files/flower.jpg");
+            VideoCapture video;
+            video.open("files/hockey.avi");

-            while(cvWaitKey(33) != 27)
+            Mat img2,img3;
+
+            while( waitKey(33) != 27 )
            {
-                cvAddS(img1,cvScalarAll(value),img2);
-                cvAddS(cvQueryFrame(video),cvScalarAll(value2),img3);
-                cvShowImage("main1",img2);
-                cvShowImage("main2",img3);
+                img1.convertTo(img2,-1,1,value);
+                video >> img3;
+
+                imshow("main1",img2);
+                imshow("main2",img3);
            }

-            cvDestroyAllWindows();
-            cvReleaseImage(&img1);
-            cvReleaseImage(&img2);
-            cvReleaseImage(&img3);
-            cvReleaseCapture(&video);
+            destroyAllWindows();
+
            return 0;
        }
    @endcode
@ -140,7 +142,7 @@ It provides easy interface to:

            cv::Mat image = cv::imread("Assets/sample.jpg");
            cv::Mat converted = cv::Mat(image.rows, image.cols, CV_8UC4);
-            cvtColor(image, converted, CV_BGR2BGRA);
+            cv::cvtColor(image, converted, COLOR_BGR2BGRA);
            cv::imshow(windowName, converted); // this will create window if it hasn't been created before

            int state = 42;
@ -174,79 +176,100 @@ namespace cv

 //! Flags for cv::namedWindow
 enum WindowFlags {
-       WINDOW_NORMAL     = 0x00000000, //!< the user can resize the window (no constraint) / also use to switch a fullscreen window to a normal size
-       WINDOW_AUTOSIZE   = 0x00000001, //!< the user cannot resize the window, the size is constrainted by the image displayed
-       WINDOW_OPENGL     = 0x00001000, //!< window with opengl support
+       WINDOW_NORMAL     = 0x00000000, //!< the user can resize the window (no constraint) / also use to switch a fullscreen window to a normal size.
+       WINDOW_AUTOSIZE   = 0x00000001, //!< the user cannot resize the window, the size is constrainted by the image displayed.
+       WINDOW_OPENGL     = 0x00001000, //!< window with opengl support.

-       WINDOW_FULLSCREEN = 1,          //!< change the window to fullscreen
-       WINDOW_FREERATIO  = 0x00000100, //!< the image expends as much as it can (no ratio constraint)
-       WINDOW_KEEPRATIO  = 0x00000000  //!< the ratio of the image is respected
+       WINDOW_FULLSCREEN = 1,          //!< change the window to fullscreen.
+       WINDOW_FREERATIO  = 0x00000100, //!< the image expends as much as it can (no ratio constraint).
+       WINDOW_KEEPRATIO  = 0x00000000  //!< the ratio of the image is respected.
     };

 //! Flags for cv::setWindowProperty / cv::getWindowProperty
 enum WindowPropertyFlags {
-       WND_PROP_FULLSCREEN   = 0, //!< fullscreen property    (can be WINDOW_NORMAL or WINDOW_FULLSCREEN)
-       WND_PROP_AUTOSIZE     = 1, //!< autosize property      (can be WINDOW_NORMAL or WINDOW_AUTOSIZE)
-       WND_PROP_ASPECT_RATIO = 2, //!< window's aspect ration (can be set to WINDOW_FREERATIO or WINDOW_KEEPRATIO);
-       WND_PROP_OPENGL       = 3  //!< opengl support
+       WND_PROP_FULLSCREEN   = 0, //!< fullscreen property    (can be WINDOW_NORMAL or WINDOW_FULLSCREEN).
+       WND_PROP_AUTOSIZE     = 1, //!< autosize property      (can be WINDOW_NORMAL or WINDOW_AUTOSIZE).
+       WND_PROP_ASPECT_RATIO = 2, //!< window's aspect ration (can be set to WINDOW_FREERATIO or WINDOW_KEEPRATIO).
+       WND_PROP_OPENGL       = 3  //!< opengl support.
     };

-enum { EVENT_MOUSEMOVE      = 0,
-       EVENT_LBUTTONDOWN    = 1,
-       EVENT_RBUTTONDOWN    = 2,
-       EVENT_MBUTTONDOWN    = 3,
-       EVENT_LBUTTONUP      = 4,
-       EVENT_RBUTTONUP      = 5,
-       EVENT_MBUTTONUP      = 6,
-       EVENT_LBUTTONDBLCLK  = 7,
-       EVENT_RBUTTONDBLCLK  = 8,
-       EVENT_MBUTTONDBLCLK  = 9,
-       EVENT_MOUSEWHEEL     = 10,
-       EVENT_MOUSEHWHEEL    = 11
+//! Mouse Events see cv::MouseCallback
+enum MouseEventTypes {
+       EVENT_MOUSEMOVE      = 0, //!< indicates that the mouse pointer has moved over the window.
+       EVENT_LBUTTONDOWN    = 1, //!< indicates that the left mouse button is pressed.
+       EVENT_RBUTTONDOWN    = 2, //!< indicates that the right mouse button is pressed.
+       EVENT_MBUTTONDOWN    = 3, //!< indicates that the middle mouse button is pressed.
+       EVENT_LBUTTONUP      = 4, //!< indicates that left mouse button is released.
+       EVENT_RBUTTONUP      = 5, //!< indicates that right mouse button is released.
+       EVENT_MBUTTONUP      = 6, //!< indicates that middle mouse button is released.
+       EVENT_LBUTTONDBLCLK  = 7, //!< indicates that left mouse button is double clicked.
+       EVENT_RBUTTONDBLCLK  = 8, //!< indicates that right mouse button is double clicked.
+       EVENT_MBUTTONDBLCLK  = 9, //!< indicates that middle mouse button is double clicked.
+       EVENT_MOUSEWHEEL     = 10,//!< positive and negative values mean forward and backward scrolling, respectively.
+       EVENT_MOUSEHWHEEL    = 11 //!< positive and negative values mean right and left scrolling, respectively.
     };

-enum { EVENT_FLAG_LBUTTON   = 1,
-       EVENT_FLAG_RBUTTON   = 2,
-       EVENT_FLAG_MBUTTON   = 4,
-       EVENT_FLAG_CTRLKEY   = 8,
-       EVENT_FLAG_SHIFTKEY  = 16,
-       EVENT_FLAG_ALTKEY    = 32
+//! Mouse Event Flags see cv::MouseCallback
+enum MouseEventFlags {
+       EVENT_FLAG_LBUTTON   = 1, //!< indicates that the left mouse button is down.
+       EVENT_FLAG_RBUTTON   = 2, //!< indicates that the right mouse button is down.
+       EVENT_FLAG_MBUTTON   = 4, //!< indicates that the middle mouse button is down.
+       EVENT_FLAG_CTRLKEY   = 8, //!< indicates that CTRL Key is pressed.
+       EVENT_FLAG_SHIFTKEY  = 16,//!< indicates that SHIFT Key is pressed.
+       EVENT_FLAG_ALTKEY    = 32 //!< indicates that ALT Key is pressed.
     };

 //! Qt font weight
 enum QtFontWeights {
-        QT_FONT_LIGHT           = 25, //!< QFont::Light ( Weight of 25 )
-        QT_FONT_NORMAL          = 50, //!< QFont::Normal ( Weight of 50 )
-        QT_FONT_DEMIBOLD        = 63, //!< QFont::DemiBold ( Weight of 63 )
-        QT_FONT_BOLD            = 75, //!< QFont::Bold ( Weight of 75 )
-        QT_FONT_BLACK           = 87  //!< QFont::Black ( Weight of 87 )
+        QT_FONT_LIGHT           = 25, //!< Weight of 25
+        QT_FONT_NORMAL          = 50, //!< Weight of 50
+        QT_FONT_DEMIBOLD        = 63, //!< Weight of 63
+        QT_FONT_BOLD            = 75, //!< Weight of 75
+        QT_FONT_BLACK           = 87  //!< Weight of 87
     };

 //! Qt font style
 enum QtFontStyles {
-        QT_STYLE_NORMAL         = 0, //!< QFont::StyleNormal
-        QT_STYLE_ITALIC         = 1, //!< QFont::StyleItalic
-        QT_STYLE_OBLIQUE        = 2  //!< QFont::StyleOblique
+        QT_STYLE_NORMAL         = 0, //!< Normal font.
+        QT_STYLE_ITALIC         = 1, //!< Italic font.
+        QT_STYLE_OBLIQUE        = 2  //!< Oblique font.
     };

 //! Qt "button" type
 enum QtButtonTypes {
-       QT_PUSH_BUTTON = 0, //!< Push button
-       QT_CHECKBOX    = 1, //!< Checkbox button
-       QT_RADIOBOX    = 2  //!< Radiobox button
+       QT_PUSH_BUTTON = 0, //!< Push button.
+       QT_CHECKBOX    = 1, //!< Checkbox button.
+       QT_RADIOBOX    = 2  //!< Radiobox button.
     };

-
+/** @brief Callback function for mouse events. see cv::setMouseCallback
+@param event one of the cv::MouseEventTypes constants.
+@param x The x-coordinate of the mouse event.
+@param y The y-coordinate of the mouse event.
+@param flags one of the cv::MouseEventFlags constants.
+@param userdata The optional parameter.
+ */
 typedef void (*MouseCallback)(int event, int x, int y, int flags, void* userdata);
+
+/** @brief Callback function for Trackbar see cv::createTrackbar
+@param pos current position of the specified trackbar.
+@param userdata The optional parameter.
+ */
 typedef void (*TrackbarCallback)(int pos, void* userdata);
+
+/** @brief Callback function defined to be called every frame. See cv::setOpenGlDrawCallback
+@param userdata The optional parameter.
+ */
 typedef void (*OpenGlDrawCallback)(void* userdata);
+
+/** @brief Callback function for a button created by cv::createButton
+@param state current state of the button. It could be -1 for a push button, 0 or 1 for a check/radio box button.
+@param userdata The optional parameter.
+ */
 typedef void (*ButtonCallback)(int state, void* userdata);

 /** @brief Creates a window.

-@param winname Name of the window in the window caption that may be used as a window identifier.
-@param flags Flags of the window. The supported flags are: (cv::WindowFlags)
-
 The function namedWindow creates a window that can be used as a placeholder for images and
 trackbars. Created windows are referred to by their names.

@ -267,14 +290,17 @@ Qt backend supports additional flags:
 -   **CV_GUI_NORMAL or CV_GUI_EXPANDED:** CV_GUI_NORMAL is the old way to draw the window
     without statusbar and toolbar, whereas CV_GUI_EXPANDED is a new enhanced GUI.
 By default, flags == WINDOW_AUTOSIZE | WINDOW_KEEPRATIO | CV_GUI_EXPANDED
+
+@param winname Name of the window in the window caption that may be used as a window identifier.
+@param flags Flags of the window. The supported flags are: (cv::WindowFlags)
 */
 CV_EXPORTS_W void namedWindow(const String& winname, int flags = WINDOW_AUTOSIZE);

-/** @brief Destroys a window.
-
-@param winname Name of the window to be destroyed.
+/** @brief Destroys the specified window.

 The function destroyWindow destroys the window with the given name.
+
+@param winname Name of the window to be destroyed.
 */
 CV_EXPORTS_W void destroyWindow(const String& winname);

@ -288,8 +314,6 @@ CV_EXPORTS_W int startWindowThread();

 /** @brief Waits for a pressed key.

-@param delay Delay in milliseconds. 0 is the special value that means "forever".
-
 The function waitKey waits for a key event infinitely (when \f$\texttt{delay}\leq 0\f$ ) or for delay
 milliseconds, when it is positive. Since the OS has a minimum time between switching threads, the
 function will not wait exactly delay ms, it will wait at least delay ms, depending on what else is
@ -306,16 +330,15 @@ takes care of event processing.

 The function only works if there is at least one HighGUI window created and the window is active.
 If there are several HighGUI windows, any of them can be active.
+
+@param delay Delay in milliseconds. 0 is the special value that means "forever".
 */
 CV_EXPORTS_W int waitKey(int delay = 0);

 /** @brief Displays an image in the specified window.

-@param winname Name of the window.
-@param mat Image to be shown.
-
 The function imshow displays an image in the specified window. If the window was created with the
-WINDOW_AUTOSIZE flag, the image is shown with its original size, however it is still limited by the screen resolution.
+cv::WINDOW_AUTOSIZE flag, the image is shown with its original size, however it is still limited by the screen resolution.
 Otherwise, the image is scaled to fit the window. The function may scale the image, depending on its depth:

 -   If the image is 8-bit unsigned, it is displayed as is.
@ -324,77 +347,81 @@ Otherwise, the image is scaled to fit the window. The function may scale the ima
 -   If the image is 32-bit floating-point, the pixel values are multiplied by 255. That is, the
    value range [0,1] is mapped to [0,255].

-If window was created with OpenGL support, imshow also support ogl::Buffer , ogl::Texture2D and
+If window was created with OpenGL support, cv::imshow also support ogl::Buffer , ogl::Texture2D and
 cuda::GpuMat as input.

-If the window was not created before this function, it is assumed creating a window with WINDOW_AUTOSIZE.
+If the window was not created before this function, it is assumed creating a window with cv::WINDOW_AUTOSIZE.

 If you need to show an image that is bigger than the screen resolution, you will need to call namedWindow("", WINDOW_NORMAL) before the imshow.

@note This function should be followed by cv::waitKey function which displays the image for specified
-milliseconds. Otherwise, it won't display the image. For example, cv::waitKey(0) will display the window
-infinitely until any keypress (it is suitable for image display). cv::waitKey(25) will display a frame
+milliseconds. Otherwise, it won't display the image. For example, **waitKey(0)** will display the window
+infinitely until any keypress (it is suitable for image display). **waitKey(25)** will display a frame
 for 25 ms, after which display will be automatically closed. (If you put it in a loop to read
 videos, it will display the video frame-by-frame)

@note

-[Windows Backend Only] Pressing Ctrl+C will copy the image to the clipboard.
+[__Windows Backend Only__] Pressing Ctrl+C will copy the image to the clipboard.

-[Windows Backend Only] Pressing Ctrl+S will show a dialog to save the image.
+[__Windows Backend Only__] Pressing Ctrl+S will show a dialog to save the image.

+@param winname Name of the window.
+@param mat Image to be shown.
 */
 CV_EXPORTS_W void imshow(const String& winname, InputArray mat);

 /** @brief Resizes window to the specified size

-@param winname Window name
-@param width The new window width
-@param height The new window height
-
@note

 -   The specified window size is for the image area. Toolbars are not counted.
-   Only windows created without WINDOW_AUTOSIZE flag can be resized.
+-   Only windows created without cv::WINDOW_AUTOSIZE flag can be resized.
+
+@param winname Window name.
+@param width The new window width.
+@param height The new window height.
 */
 CV_EXPORTS_W void resizeWindow(const String& winname, int width, int height);

 /** @brief Moves window to the specified position

-@param winname Window name
-@param x The new x-coordinate of the window
-@param y The new y-coordinate of the window
+@param winname Name of the window.
+@param x The new x-coordinate of the window.
+@param y The new y-coordinate of the window.
 */
 CV_EXPORTS_W void moveWindow(const String& winname, int x, int y);

 /** @brief Changes parameters of a window dynamically.

+The function setWindowProperty enables changing properties of a window.
+
@param winname Name of the window.
@param prop_id Window property to edit. The supported operation flags are: (cv::WindowPropertyFlags)
@param prop_value New value of the window property. The supported flags are: (cv::WindowFlags)
-
-The function setWindowProperty enables changing properties of a window.
 */
 CV_EXPORTS_W void setWindowProperty(const String& winname, int prop_id, double prop_value);

 /** @brief Updates window title
+@param winname Name of the window.
+@param title New title.
 */
 CV_EXPORTS_W void setWindowTitle(const String& winname, const String& title);

 /** @brief Provides parameters of a window.

+The function getWindowProperty returns properties of a window.
+
@param winname Name of the window.
@param prop_id Window property to retrieve. The following operation flags are available: (cv::WindowPropertyFlags)

-See setWindowProperty to know the meaning of the returned values.
-
-The function getWindowProperty returns properties of a window.
+@sa setWindowProperty
 */
 CV_EXPORTS_W double getWindowProperty(const String& winname, int prop_id);

 /** @brief Sets mouse handler for the specified window

-@param winname Window name
+@param winname Name of the window.
@param onMouse Mouse callback. See OpenCV samples, such as
 <https://github.com/Itseez/opencv/tree/master/samples/cpp/ffilldemo.cpp>, on how to specify and
 use the callback.
@ -402,18 +429,16 @@ use the callback.
 */
 CV_EXPORTS void setMouseCallback(const String& winname, MouseCallback onMouse, void* userdata = 0);

-/** @brief Gets the mouse-wheel motion delta, when handling mouse-wheel events EVENT_MOUSEWHEEL and
-EVENT_MOUSEHWHEEL.
-
-@param flags The mouse callback flags parameter.
+/** @brief Gets the mouse-wheel motion delta, when handling mouse-wheel events cv::EVENT_MOUSEWHEEL and
+cv::EVENT_MOUSEHWHEEL.

 For regular mice with a scroll-wheel, delta will be a multiple of 120. The value 120 corresponds to
 a one notch rotation of the wheel or the threshold for action to be taken and one such action should
 occur for each delta. Some high-precision mice with higher-resolution freely-rotating wheels may
 generate smaller values.

-For EVENT_MOUSEWHEEL positive and negative values mean forward and backward scrolling,
-respectively. For EVENT_MOUSEHWHEEL, where available, positive and negative values mean right and
+For cv::EVENT_MOUSEWHEEL positive and negative values mean forward and backward scrolling,
+respectively. For cv::EVENT_MOUSEHWHEEL, where available, positive and negative values mean right and
 left scrolling, respectively.

 With the C API, the macro CV_GET_WHEEL_DELTA(flags) can be used alternatively.
@ -421,23 +446,13 @@ With the C API, the macro CV_GET_WHEEL_DELTA(flags) can be used alternatively.
@note

 Mouse-wheel events are currently supported only on Windows.
+
+@param flags The mouse callback flags parameter.
 */
 CV_EXPORTS int getMouseWheelDelta(int flags);

 /** @brief Creates a trackbar and attaches it to the specified window.

-@param trackbarname Name of the created trackbar.
-@param winname Name of the window that will be used as a parent of the created trackbar.
-@param value Optional pointer to an integer variable whose value reflects the position of the
-slider. Upon creation, the slider position is defined by this variable.
-@param count Maximal position of the slider. The minimal position is always 0.
-@param onChange Pointer to the function to be called every time the slider changes position. This
-function should be prototyped as void Foo(int,void\*); , where the first parameter is the trackbar
-position and the second parameter is the user data (see the next parameter). If the callback is
-the NULL pointer, no callbacks are called, but only value is updated.
-@param userdata User data that is passed as is to the callback. It can be used to handle trackbar
-events without using global variables.
-
 The function createTrackbar creates a trackbar (a slider or range control) with the specified name
 and range, assigns a variable value to be a position synchronized with the trackbar and specifies
 the callback function onChange to be called on the trackbar position change. The created trackbar is
@ -445,11 +460,22 @@ displayed in the specified window winname.

@note

-**[Qt Backend Only]** winname can be empty (or NULL) if the trackbar should be attached to the
+[__Qt Backend Only__] winname can be empty (or NULL) if the trackbar should be attached to the
 control panel.

 Clicking the label of each trackbar enables editing the trackbar values manually.

+@param trackbarname Name of the created trackbar.
+@param winname Name of the window that will be used as a parent of the created trackbar.
+@param value Optional pointer to an integer variable whose value reflects the position of the
+slider. Upon creation, the slider position is defined by this variable.
+@param count Maximal position of the slider. The minimal position is always 0.
+@param onChange Pointer to the function to be called every time the slider changes position. This
+function should be prototyped as void Foo(int,void\*); , where the first parameter is the trackbar
+position and the second parameter is the user data (see the next parameter). If the callback is
+the NULL pointer, no callbacks are called, but only value is updated.
+@param userdata User data that is passed as is to the callback. It can be used to handle trackbar
+events without using global variables.
 */
 CV_EXPORTS int createTrackbar(const String& trackbarname, const String& winname,
                              int* value, int count,
@ -458,63 +484,77 @@ CV_EXPORTS int createTrackbar(const String& trackbarname, const String& winname,

 /** @brief Returns the trackbar position.

-@param trackbarname Name of the trackbar.
-@param winname Name of the window that is the parent of the trackbar.
-
 The function returns the current position of the specified trackbar.

@note

-**[Qt Backend Only]** winname can be empty (or NULL) if the trackbar is attached to the control
+[__Qt Backend Only__] winname can be empty (or NULL) if the trackbar is attached to the control
 panel.

+@param trackbarname Name of the trackbar.
+@param winname Name of the window that is the parent of the trackbar.
 */
 CV_EXPORTS_W int getTrackbarPos(const String& trackbarname, const String& winname);

 /** @brief Sets the trackbar position.

-@param trackbarname Name of the trackbar.
-@param winname Name of the window that is the parent of trackbar.
-@param pos New position.
-
 The function sets the position of the specified trackbar in the specified window.

@note

-**[Qt Backend Only]** winname can be empty (or NULL) if the trackbar is attached to the control
+[__Qt Backend Only__] winname can be empty (or NULL) if the trackbar is attached to the control
 panel.
+
+@param trackbarname Name of the trackbar.
+@param winname Name of the window that is the parent of trackbar.
+@param pos New position.
 */
 CV_EXPORTS_W void setTrackbarPos(const String& trackbarname, const String& winname, int pos);

 /** @brief Sets the trackbar maximum position.

+The function sets the maximum position of the specified trackbar in the specified window.
+
+@note
+
+[__Qt Backend Only__] winname can be empty (or NULL) if the trackbar is attached to the control
+panel.
+
@param trackbarname Name of the trackbar.
@param winname Name of the window that is the parent of trackbar.
@param maxval New maximum position.
+ */
+CV_EXPORTS_W void setTrackbarMax(const String& trackbarname, const String& winname, int maxval);

-The function sets the maximum position of the specified trackbar in the specified window.
+/** @brief Sets the trackbar minimum position.
+
+The function sets the minimum position of the specified trackbar in the specified window.

@note

-**[Qt Backend Only]** winname can be empty (or NULL) if the trackbar is attached to the control
+[__Qt Backend Only__] winname can be empty (or NULL) if the trackbar is attached to the control
 panel.
+
+@param trackbarname Name of the trackbar.
+@param winname Name of the window that is the parent of trackbar.
+@param minval New maximum position.
 */
-CV_EXPORTS_W void setTrackbarMax(const String& trackbarname, const String& winname, int maxval);
+CV_EXPORTS_W void setTrackbarMin(const String& trackbarname, const String& winname, int minval);

 //! @addtogroup highgui_opengl OpenGL support
 //! @{

+/** @brief Displays OpenGL 2D texture in the specified window.
+
+@param winname Name of the window.
+@param tex OpenGL 2D texture data.
+ */
 CV_EXPORTS void imshow(const String& winname, const ogl::Texture2D& tex);

 /** @brief Sets a callback function to be called to draw on top of displayed image.

-@param winname Name of the window.
-@param onOpenGlDraw Pointer to the function to be called every frame. This function should be
-prototyped as void Foo(void\*) .
-@param userdata Pointer passed to the callback function. *(Optional)*
-
 The function setOpenGlDrawCallback can be used to draw 3D data on the window. See the example of
-callback function below: :
+callback function below:
@code
    void on_opengl(void* param)
    {
@ -545,18 +585,23 @@ callback function below: :
        }
    }
@endcode
+
+@param winname Name of the window.
+@param onOpenGlDraw Pointer to the function to be called every frame. This function should be
+prototyped as void Foo(void\*) .
+@param userdata Pointer passed to the callback function.(__Optional__)
 */
 CV_EXPORTS void setOpenGlDrawCallback(const String& winname, OpenGlDrawCallback onOpenGlDraw, void* userdata = 0);

 /** @brief Sets the specified window as current OpenGL context.

-@param winname Window name
+@param winname Name of the window.
 */
 CV_EXPORTS void setOpenGlContext(const String& winname);

-/** @brief Force window to redraw its context and call draw callback ( setOpenGlDrawCallback ).
+/** @brief Force window to redraw its context and call draw callback ( See cv::setOpenGlDrawCallback ).

-@param winname Window name
+@param winname Name of the window.
 */
 CV_EXPORTS void updateWindow(const String& winname);

@ -564,102 +609,103 @@ CV_EXPORTS void updateWindow(const String& winname);

 //! @addtogroup highgui_qt
 //! @{
-// Only for Qt

+/** @brief QtFont available only for Qt. See cv::fontQt
+ */
 struct QtFont
 {
-    const char* nameFont;  // Qt: nameFont
-    Scalar      color;     // Qt: ColorFont -> cvScalar(blue_component, green_component, red_component[, alpha_component])
-    int         font_face; // Qt: bool italic
-    const int*  ascii;     // font data and metrics
+    const char* nameFont;  //!< Name of the font
+    Scalar      color;     //!< Color of the font. Scalar(blue_component, green_component, red_component[, alpha_component])
+    int         font_face; //!< See cv::QtFontStyles
+    const int*  ascii;     //!< font data and metrics
    const int*  greek;
    const int*  cyrillic;
    float       hscale, vscale;
-    float       shear;     // slope coefficient: 0 - normal, >0 - italic
-    int         thickness; // Qt: weight
-    float       dx;        // horizontal interval between letters
-    int         line_type; // Qt: PointSize
+    float       shear;     //!< slope coefficient: 0 - normal, >0 - italic
+    int         thickness; //!< See cv::QtFontWeights
+    float       dx;        //!< horizontal interval between letters
+    int         line_type; //!< PointSize
 };

 /** @brief Creates the font to draw a text on an image.

+The function fontQt creates a cv::QtFont object. This cv::QtFont is not compatible with putText .
+
+A basic usage of this function is the following: :
+@code
+    QtFont font = fontQt("Times");
+    addText( img1, "Hello World !", Point(50,50), font);
+@endcode
+
@param nameFont Name of the font. The name should match the name of a system font (such as
 *Times*). If the font is not found, a default one is used.
@param pointSize Size of the font. If not specified, equal zero or negative, the point size of the
 font is set to a system-dependent default value. Generally, this is 12 points.
-@param color Color of the font in BGRA where A = 255 is fully transparent. Use the macro CV _ RGB
+@param color Color of the font in BGRA where A = 255 is fully transparent. Use the macro CV_RGB
 for simplicity.
-@param weight Font weight. Available operation flags are : (cv::QtFontWeights) You can also specify a positive integer for better control.
-@param style Font style. The following operation flags are available: (cv::QtFontStyles)
+@param weight Font weight. Available operation flags are : cv::QtFontWeights You can also specify a positive integer for better control.
+@param style Font style. Available operation flags are : cv::QtFontStyles
@param spacing Spacing between characters. It can be negative or positive.
-
-The function fontQt creates a QtFont object. This QtFont is not compatible with putText .
-
-A basic usage of this function is the following: :
-@code
-    QtFont font = fontQt(''Times'');
-    addText( img1, ``Hello World !'', Point(50,50), font);
-@endcode
 */
 CV_EXPORTS QtFont fontQt(const String& nameFont, int pointSize = -1,
                         Scalar color = Scalar::all(0), int weight = QT_FONT_NORMAL,
                         int style = QT_STYLE_NORMAL, int spacing = 0);

-/** @brief Creates the font to draw a text on an image.
+/** @brief Draws a text on the image.
+
+The function addText draws *text* on the image *img* using a specific font *font* (see example cv::fontQt
+)

@param img 8-bit 3-channel image where the text should be drawn.
@param text Text to write on an image.
@param org Point(x,y) where the text should start on an image.
@param font Font to use to draw a text.
-
-The function addText draws *text* on an image *img* using a specific font *font* (see example cv::fontQt
-)
 */
 CV_EXPORTS void addText( const Mat& img, const String& text, Point org, const QtFont& font);

 /** @brief Displays a text on a window image as an overlay for a specified duration.

+The function displayOverlay displays useful information/tips on top of the window for a certain
+amount of time *delayms*. The function does not modify the image, displayed in the window, that is,
+after the specified delay the original content of the window is restored.
+
@param winname Name of the window.
@param text Overlay text to write on a window image.
@param delayms The period (in milliseconds), during which the overlay text is displayed. If this
 function is called before the previous overlay text timed out, the timer is restarted and the text
 is updated. If this value is zero, the text never disappears.
-
-The function displayOverlay displays useful information/tips on top of the window for a certain
-amount of time *delayms*. The function does not modify the image, displayed in the window, that is,
-after the specified delay the original content of the window is restored.
 */
 CV_EXPORTS void displayOverlay(const String& winname, const String& text, int delayms = 0);

 /** @brief Displays a text on the window statusbar during the specified period of time.

+The function displayStatusBar displays useful information/tips on top of the window for a certain
+amount of time *delayms* . This information is displayed on the window statusbar (the window must be
+created with the CV_GUI_EXPANDED flags).
+
@param winname Name of the window.
@param text Text to write on the window statusbar.
@param delayms Duration (in milliseconds) to display the text. If this function is called before
 the previous text timed out, the timer is restarted and the text is updated. If this value is
 zero, the text never disappears.
-
-The function displayStatusBar displays useful information/tips on top of the window for a certain
-amount of time *delayms* . This information is displayed on the window statusbar (the window must be
-created with the CV_GUI_EXPANDED flags).
 */
 CV_EXPORTS void displayStatusBar(const String& winname, const String& text, int delayms = 0);

 /** @brief Saves parameters of the specified window.

-@param windowName Name of the window.
-
 The function saveWindowParameters saves size, location, flags, trackbars value, zoom and panning
-location of the window window_name .
+location of the window windowName.
+
+@param windowName Name of the window.
 */
 CV_EXPORTS void saveWindowParameters(const String& windowName);

 /** @brief Loads parameters of the specified window.

-@param windowName Name of the window.
-
 The function loadWindowParameters loads size, location, flags, trackbars value, zoom and panning
-location of the window window_name .
+location of the window windowName.
+
+@param windowName Name of the window.
 */
 CV_EXPORTS void loadWindowParameters(const String& windowName);

@ -669,21 +715,11 @@ CV_EXPORTS  void stopLoop();

 /** @brief Attaches a button to the control panel.

-@param  bar_name
-   Name of the button.
-@param on_change Pointer to the function to be called every time the button changes its state.
-This function should be prototyped as void Foo(int state,\*void); . *state* is the current state
-of the button. It could be -1 for a push button, 0 or 1 for a check/radio box button.
-@param userdata Pointer passed to the callback function.
-@param type Optional type of the button. Available types are: (cv::QtButtonTypes)
-@param initial_button_state Default state of the button. Use for checkbox and radiobox. Its
-value could be 0 or 1. *(Optional)*
-
 The function createButton attaches a button to the control panel. Each button is added to a
 buttonbar to the right of the last button. A new buttonbar is created if nothing was attached to the
 control panel before, or if the last element attached to the control panel was a trackbar.

-See below various examples of the createButton function call: :
+See below various examples of the cv::createButton function call: :
@code
    createButton(NULL,callbackButton);//create a push button "button 0", that will call callbackButton.
    createButton("button2",callbackButton,NULL,QT_CHECKBOX,0);
@ -691,6 +727,15 @@ See below various examples of the createButton function call: :
    createButton("button5",callbackButton1,NULL,QT_RADIOBOX);
    createButton("button6",callbackButton2,NULL,QT_PUSH_BUTTON,1);
@endcode
+
+@param  bar_name Name of the button.
+@param on_change Pointer to the function to be called every time the button changes its state.
+This function should be prototyped as void Foo(int state,\*void); . *state* is the current state
+of the button. It could be -1 for a push button, 0 or 1 for a check/radio box button.
+@param userdata Pointer passed to the callback function.
+@param type Optional type of the button. Available types are: (cv::QtButtonTypes)
+@param initial_button_state Default state of the button. Use for checkbox and radiobox. Its
+value could be 0 or 1. (__Optional__)
 */
 CV_EXPORTS int createButton( const String& bar_name, ButtonCallback on_change,
                             void* userdata = 0, int type = QT_PUSH_BUTTON,
--- a/modules/highgui/include/opencv2/highgui/highgui_c.h
+++ b/modules/highgui/include/opencv2/highgui/highgui_c.h
@ -166,6 +166,7 @@ CVAPI(int) cvCreateTrackbar2( const char* trackbar_name, const char* window_name
 CVAPI(int) cvGetTrackbarPos( const char* trackbar_name, const char* window_name );
 CVAPI(void) cvSetTrackbarPos( const char* trackbar_name, const char* window_name, int pos );
 CVAPI(void) cvSetTrackbarMax(const char* trackbar_name, const char* window_name, int maxval);
+CVAPI(void) cvSetTrackbarMin(const char* trackbar_name, const char* window_name, int minval);

 enum
 {
--- a/modules/highgui/src/window.cpp
+++ b/modules/highgui/src/window.cpp
@ -216,6 +216,11 @@ void cv::setTrackbarMax(const String& trackbarName, const String& winName, int m
    cvSetTrackbarMax(trackbarName.c_str(), winName.c_str(), maxval);
 }

+void cv::setTrackbarMin(const String& trackbarName, const String& winName, int minval)
+{
+    cvSetTrackbarMin(trackbarName.c_str(), winName.c_str(), minval);
+}
+
 int cv::getTrackbarPos( const String& trackbarName, const String& winName )
 {
    return cvGetTrackbarPos(trackbarName.c_str(), winName.c_str());
@ -589,6 +594,11 @@ CV_IMPL void cvSetTrackbarMax(const char*, const char*, int)
    CV_NO_GUI_ERROR( "cvSetTrackbarMax" );
 }

+CV_IMPL void cvSetTrackbarMin(const char*, const char*, int)
+{
+    CV_NO_GUI_ERROR( "cvSetTrackbarMin" );
+}
+
 CV_IMPL void* cvGetWindowHandle( const char* )
 {
    CV_NO_GUI_ERROR( "cvGetWindowHandle" );
--- a/modules/highgui/src/window_QT.cpp
+++ b/modules/highgui/src/window_QT.cpp
@ -664,12 +664,29 @@ CV_IMPL void cvSetTrackbarMax(const char* name_bar, const char* window_name, int
        QPointer<CvTrackbar> t = icvFindTrackBarByName(name_bar, window_name);
        if (t)
        {
+            int minval = t->slider->minimum();
+            maxval = (maxval>minval)?maxval:minval;
            t->slider->setMaximum(maxval);
        }
    }
 }


+CV_IMPL void cvSetTrackbarMin(const char* name_bar, const char* window_name, int minval)
+{
+    if (minval >= 0)
+    {
+        QPointer<CvTrackbar> t = icvFindTrackBarByName(name_bar, window_name);
+        if (t)
+        {
+            int maxval = t->slider->maximum();
+            minval = (maxval<minval)?maxval:minval;
+            t->slider->setMinimum(minval);
+        }
+    }
+}
+
+
 /* assign callback for mouse events */
 CV_IMPL void cvSetMouseCallback(const char* window_name, CvMouseCallback on_mouse, void* param)
 {
--- a/modules/highgui/src/window_QT.h
+++ b/modules/highgui/src/window_QT.h
@ -73,7 +73,6 @@
 #include <QFileDialog>
 #include <QToolBar>
 #include <QAction>
-#include <QPushButton>
 #include <QCheckBox>
 #include <QRadioButton>
 #include <QButtonGroup>
--- a/modules/highgui/src/window_cocoa.mm
+++ b/modules/highgui/src/window_cocoa.mm
@ -62,6 +62,7 @@ CV_IMPL void cvSetMouseCallback( const char* name, CvMouseCallback function, voi
 CV_IMPL int cvGetTrackbarPos( const char* trackbar_name, const char* window_name ) {return 0;}
 CV_IMPL void cvSetTrackbarPos(const char* trackbar_name, const char* window_name, int pos) {}
 CV_IMPL void cvSetTrackbarMax(const char* trackbar_name, const char* window_name, int maxval) {}
+CV_IMPL void cvSetTrackbarMin(const char* trackbar_name, const char* window_name, int minval) {}
 CV_IMPL void* cvGetWindowHandle( const char* name ) {return NULL;}
 CV_IMPL const char* cvGetWindowName( void* window_handle ) {return NULL;}
 CV_IMPL int cvNamedWindow( const char* name, int flags ) {return 0; }
@ -426,7 +427,7 @@ CV_IMPL void cvSetTrackbarPos(const char* trackbar_name, const char* window_name

 CV_IMPL void cvSetTrackbarMax(const char* trackbar_name, const char* window_name, int maxval)
 {
-    CV_FUNCNAME("cvSetTrackbarPos");
+    CV_FUNCNAME("cvSetTrackbarMax");

    CVWindow *window = nil;
    CVSlider *slider = nil;
@ -445,6 +446,8 @@ CV_IMPL void cvSetTrackbarMax(const char* trackbar_name, const char* window_name
        slider = [[window sliders] valueForKey:[NSString stringWithFormat:@"%s", trackbar_name]];
        if(slider) {
            if(maxval >= 0) {
+                int minval = [[slider slider] minValue];
+                maxval = (minval>maxval)?minval:maxval;
                [[slider slider] setMaxValue:maxval];
            }
        }
@ -454,6 +457,37 @@ CV_IMPL void cvSetTrackbarMax(const char* trackbar_name, const char* window_name
    __END__;
 }

+CV_IMPL void cvSetTrackbarMin(const char* trackbar_name, const char* window_name, int minval)
+{
+    CV_FUNCNAME("cvSetTrackbarMin");
+
+    CVWindow *window = nil;
+    CVSlider *slider = nil;
+    NSAutoreleasePool* localpool5 = nil;
+
+    __BEGIN__;
+    if(trackbar_name == NULL || window_name == NULL)
+        CV_ERROR( CV_StsNullPtr, "NULL trackbar or window name" );
+
+    if (localpool5 != nil) [localpool5 drain];
+    localpool5 = [[NSAutoreleasePool alloc] init];
+
+    window = cvGetWindow(window_name);
+    if(window) {
+        slider = [[window sliders] valueForKey:[NSString stringWithFormat:@"%s", trackbar_name]];
+        if(slider) {
+            if(minval >= 0) {
+                int maxval = [[slider slider] maxValue];
+                minval = (minval<maxval)?minval:maxval;
+                [[slider slider] setMinValue:minval];
+            }
+        }
+    }
+    [localpool5 drain];
+
+    __END__;
+}
+
 CV_IMPL void* cvGetWindowHandle( const char* name )
 {
    //cout << "cvGetWindowHandle" << endl;
--- a/modules/highgui/src/window_gtk.cpp
+++ b/modules/highgui/src/window_gtk.cpp
@ -246,7 +246,7 @@ cvImageWidget_get_preferred_width (GtkWidget *widget, gint *minimal_width, gint
  CvImageWidget * image_widget = CV_IMAGE_WIDGET( widget );

  if(image_widget->original_image != NULL) {
-    *minimal_width = image_widget->flags & CV_WINDOW_AUTOSIZE ?
+    *minimal_width = (image_widget->flags & CV_WINDOW_AUTOSIZE) != CV_WINDOW_AUTOSIZE ?
      gdk_window_get_width(gtk_widget_get_window(widget)) : image_widget->original_image->cols;
  }
  else {
@ -270,7 +270,7 @@ cvImageWidget_get_preferred_height (GtkWidget *widget, gint *minimal_height, gin
  CvImageWidget * image_widget = CV_IMAGE_WIDGET( widget );

  if(image_widget->original_image != NULL) {
-    *minimal_height = image_widget->flags & CV_WINDOW_AUTOSIZE ?
+    *minimal_height = (image_widget->flags & CV_WINDOW_AUTOSIZE) != CV_WINDOW_AUTOSIZE ?
      gdk_window_get_height(gtk_widget_get_window(widget)) : image_widget->original_image->rows;
  }
  else {
@ -508,6 +508,7 @@ typedef struct CvTrackbar
    int* data;
    int pos;
    int maxval;
+    int minval;
    CvTrackbarCallback notify;
    CvTrackbarCallback2 notify2;
    void* userdata;
@ -1607,7 +1608,7 @@ CV_IMPL void cvSetTrackbarMax(const char* trackbar_name, const char* window_name
            trackbar = icvFindTrackbarByName(window, trackbar_name);
            if (trackbar)
            {
-                trackbar->maxval = maxval;
+                trackbar->maxval = (trackbar->minval>maxval)?trackbar->minval:maxval;

                CV_LOCK_MUTEX();

@ -1622,6 +1623,43 @@ CV_IMPL void cvSetTrackbarMax(const char* trackbar_name, const char* window_name
 }


+CV_IMPL void cvSetTrackbarMin(const char* trackbar_name, const char* window_name, int minval)
+{
+    CV_FUNCNAME("cvSetTrackbarMin");
+
+    __BEGIN__;
+
+    if (minval >= 0)
+    {
+        CvWindow* window = 0;
+        CvTrackbar* trackbar = 0;
+
+        if (trackbar_name == 0 || window_name == 0)
+        {
+            CV_ERROR( CV_StsNullPtr, "NULL trackbar or window name");
+        }
+
+        window = icvFindWindowByName( window_name );
+        if (window)
+        {
+            trackbar = icvFindTrackbarByName(window, trackbar_name);
+            if (trackbar)
+            {
+                trackbar->minval = (minval<trackbar->maxval)?minval:trackbar->maxval;
+
+                CV_LOCK_MUTEX();
+
+                gtk_range_set_range(GTK_RANGE(trackbar->widget), minval, trackbar->maxval);
+
+                CV_UNLOCK_MUTEX();
+            }
+        }
+    }
+
+    __END__;
+}
+
+
 CV_IMPL void* cvGetWindowHandle( const char* window_name )
 {
    void* widget = 0;
--- a/modules/highgui/src/window_w32.cpp
+++ b/modules/highgui/src/window_w32.cpp
@ -138,6 +138,7 @@ typedef struct CvTrackbar
    int* data;
    int pos;
    int maxval;
+    int minval;
    void (*notify)(int);
    void (*notify2)(int, void*);
    void* userdata;
@ -1909,7 +1910,8 @@ static void showSaveDialog(CvWindow* window)

    if (GetSaveFileName(&ofn))
    {
-        cv::Mat tmp; cv::flip(cv::Mat(sz.cy, sz.cx, CV_8UC(channels), data), tmp, 0);
+        cv::Mat tmp;
+        cv::flip(cv::Mat(sz.cy, sz.cx, CV_8UC(channels), data, (sz.cx * channels + 3) & -4), tmp, 0);
        cv::imwrite(szFileName, tmp);
    }
 }
@ -2324,7 +2326,7 @@ CV_IMPL void cvSetTrackbarMax(const char* trackbar_name, const char* window_name
            if (trackbar)
            {
                // The position will be min(pos, maxval).
-                trackbar->maxval = maxval;
+                trackbar->maxval = (trackbar->minval>maxval)?trackbar->minval:maxval;
                SendMessage(trackbar->hwnd, TBM_SETRANGEMAX, (WPARAM)TRUE, (LPARAM)maxval);
            }
        }
@ -2334,6 +2336,38 @@ CV_IMPL void cvSetTrackbarMax(const char* trackbar_name, const char* window_name
 }


+CV_IMPL void cvSetTrackbarMin(const char* trackbar_name, const char* window_name, int minval)
+{
+    CV_FUNCNAME( "cvSetTrackbarMin" );
+
+    __BEGIN__;
+
+    if (minval >= 0)
+    {
+        CvWindow* window = 0;
+        CvTrackbar* trackbar = 0;
+        if (trackbar_name == 0 || window_name == 0)
+        {
+            CV_ERROR(CV_StsNullPtr, "NULL trackbar or window name");
+        }
+
+        window = icvFindWindowByName(window_name);
+        if (window)
+        {
+            trackbar = icvFindTrackbarByName(window, trackbar_name);
+            if (trackbar)
+            {
+                // The position will be min(pos, maxval).
+                trackbar->minval = (minval<trackbar->maxval)?minval:trackbar->maxval;
+                SendMessage(trackbar->hwnd, TBM_SETRANGEMIN, (WPARAM)TRUE, (LPARAM)minval);
+            }
+        }
+    }
+
+    __END__;
+}
+
+
 CV_IMPL void* cvGetWindowHandle( const char* window_name )
 {
    void* hwnd = 0;
--- a/modules/highgui/src/window_winrt.cpp
+++ b/modules/highgui/src/window_winrt.cpp
@ -170,6 +170,22 @@ CV_IMPL void cvSetTrackbarMax(const char* trackbar_name, const char* window_name
    }
 }

+CV_IMPL void cvSetTrackbarMin(const char* trackbar_name, const char* window_name, int minval)
+{
+    CV_FUNCNAME("cvSetTrackbarMin");
+
+    if (minval >= 0)
+    {
+        if (trackbar_name == 0 || window_name == 0)
+            CV_ERROR(CV_StsNullPtr, "NULL trackbar or window name");
+
+        CvTrackbar* trackbar = HighguiBridge::getInstance().findTrackbarByName(trackbar_name, window_name);
+
+        if (trackbar)
+            trackbar->setMinPosition(minval);
+    }
+}
+
 CV_IMPL int cvGetTrackbarPos(const char* trackbar_name, const char* window_name)
 {
    int pos = -1;
--- a/Show More
+++ b/Show More