ICV2017u3 package update;

- Optimizations set change. Now IPP integrations will provide code for SSE42, AVX2 and AVX512 (SKX) CPUs only. For HW below SSE42 IPP code is disabled.
- Performance regressions fixes for IPP code paths;
- cv::boxFilter integration improvement;
- cv::filter2D integration improvement;
pull/9395/head
Pavel Vlasov 7 years ago
parent 87c27a074d
commit a57718e1ac
  1. 7
      3rdparty/ippicv/CMakeLists.txt
  2. 26
      3rdparty/ippicv/ippicv.cmake
  3. 12
      CMakeLists.txt
  4. 32
      cmake/OpenCVFindIPP.cmake
  5. 3
      cmake/OpenCVFindIPPIW.cmake
  6. 2
      cmake/templates/cvconfig.h.in
  7. 10
      modules/core/include/opencv2/core/base.hpp
  8. 134
      modules/core/include/opencv2/core/private.hpp
  9. 77
      modules/core/src/convert.cpp
  10. 34
      modules/core/src/copy.cpp
  11. 8
      modules/core/src/mathfuncs_core.dispatch.cpp
  12. 2
      modules/core/src/matmul.cpp
  13. 5
      modules/core/src/matrix.cpp
  14. 6
      modules/core/src/precomp.hpp
  15. 29
      modules/core/src/stat.cpp
  16. 188
      modules/core/src/system.cpp
  17. 4
      modules/imgproc/src/canny.cpp
  18. 85
      modules/imgproc/src/color.cpp
  19. 18
      modules/imgproc/src/deriv.cpp
  20. 246
      modules/imgproc/src/filter.cpp
  21. 12
      modules/imgproc/src/filterengine.hpp
  22. 6
      modules/imgproc/src/histogram.cpp
  23. 41
      modules/imgproc/src/imgwarp.cpp
  24. 6
      modules/imgproc/src/moments.cpp
  25. 141
      modules/imgproc/src/morph.cpp
  26. 134
      modules/imgproc/src/smooth.cpp
  27. 17
      modules/imgproc/src/thresh.cpp
  28. 2
      modules/objdetect/src/haar.cpp
  29. 2
      modules/ts/include/opencv2/ts/ts_perf.hpp
  30. 10
      modules/ts/src/ts_func.cpp

@ -6,7 +6,7 @@ project(${IPP_IW_LIBRARY})
ocv_include_directories(${IPP_INCLUDE_DIRS} ${IPP_IW_PATH}/include) ocv_include_directories(${IPP_INCLUDE_DIRS} ${IPP_IW_PATH}/include)
add_definitions(-DIW_BUILD) add_definitions(-DIW_BUILD)
if(HAVE_IPP_ICV_ONLY) if(HAVE_IPP_ICV)
add_definitions(-DICV_BASE) add_definitions(-DICV_BASE)
endif() endif()
@ -21,7 +21,10 @@ add_library(${IPP_IW_LIBRARY} STATIC ${lib_srcs} ${lib_hdrs})
if(UNIX) if(UNIX)
if(CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) if(CMAKE_COMPILER_IS_GNUCXX OR CV_ICC)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -Wno-unused-function") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -Wno-unused-function -Wno-missing-braces -Wno-missing-field-initializers")
endif()
if (CMAKE_C_COMPILER_ID MATCHES "Clang")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-self-assign")
endif() endif()
endif() endif()

@ -2,37 +2,37 @@ function(download_ippicv root_var)
set(${root_var} "" PARENT_SCOPE) set(${root_var} "" PARENT_SCOPE)
# Commit SHA in the opencv_3rdparty repo # Commit SHA in the opencv_3rdparty repo
set(IPPICV_COMMIT "a62e20676a60ee0ad6581e217fe7e4bada3b95db") set(IPPICV_COMMIT "dfe3162c237af211e98b8960018b564bc209261d")
# Define actual ICV versions # Define actual ICV versions
if(APPLE) if(APPLE)
set(OPENCV_ICV_PLATFORM "macosx") set(OPENCV_ICV_PLATFORM "macosx")
set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_mac") set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_mac")
if(X86_64) if(X86_64)
set(OPENCV_ICV_NAME "ippicv_2017u2_mac_intel64_20170418.tgz") set(OPENCV_ICV_NAME "ippicv_2017u3_mac_intel64_general_20170822.tgz")
set(OPENCV_ICV_HASH "0c25953c99dbb499ff502485a9356d8d") set(OPENCV_ICV_HASH "c1ebb5dfa5b7f54b0c44e1917805a463")
else() else()
set(OPENCV_ICV_NAME "ippicv_2017u2_mac_ia32_20170418.tgz") set(OPENCV_ICV_NAME "ippicv_2017u3_mac_ia32_general_20170822.tgz")
set(OPENCV_ICV_HASH "5f225948f3f64067c681293c098d50d8") set(OPENCV_ICV_HASH "49b05a669042753ae75895a445ebd612")
endif() endif()
elseif((UNIX AND NOT ANDROID) OR (UNIX AND ANDROID_ABI MATCHES "x86")) elseif((UNIX AND NOT ANDROID) OR (UNIX AND ANDROID_ABI MATCHES "x86"))
set(OPENCV_ICV_PLATFORM "linux") set(OPENCV_ICV_PLATFORM "linux")
set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_lnx") set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_lnx")
if(X86_64) if(X86_64)
set(OPENCV_ICV_NAME "ippicv_2017u2_lnx_intel64_20170418.tgz") set(OPENCV_ICV_NAME "ippicv_2017u3_lnx_intel64_general_20170822.tgz")
set(OPENCV_ICV_HASH "87cbdeb627415d8e4bc811156289fa3a") set(OPENCV_ICV_HASH "4e0352ce96473837b1d671ce87f17359")
else() else()
set(OPENCV_ICV_NAME "ippicv_2017u2_lnx_ia32_20170418.tgz") set(OPENCV_ICV_NAME "ippicv_2017u3_lnx_ia32_general_20170822.tgz")
set(OPENCV_ICV_HASH "f2cece00d802d4dea86df52ed095257e") set(OPENCV_ICV_HASH "dcdb0ba4b123f240596db1840cd59a76")
endif() endif()
elseif(WIN32 AND NOT ARM) elseif(WIN32 AND NOT ARM)
set(OPENCV_ICV_PLATFORM "windows") set(OPENCV_ICV_PLATFORM "windows")
set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_win") set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_win")
if(X86_64) if(X86_64)
set(OPENCV_ICV_NAME "ippicv_2017u2_win_intel64_20170418.zip") set(OPENCV_ICV_NAME "ippicv_2017u3_win_intel64_general_20170822.zip")
set(OPENCV_ICV_HASH "75060a0c662c0800f48995b7e9b085f6") set(OPENCV_ICV_HASH "0421e642bc7ad741a2236d3ec4190bdd")
else() else()
set(OPENCV_ICV_NAME "ippicv_2017u2_win_ia32_20170418.zip") set(OPENCV_ICV_NAME "ippicv_2017u3_win_ia32_general_20170822.zip")
set(OPENCV_ICV_HASH "60fcf3ccd9a2ebc9e432ffb5cb91638b") set(OPENCV_ICV_HASH "8a7680ae352c192de2e2e34936164bd0")
endif() endif()
else() else()
return() return()

@ -255,7 +255,6 @@ OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" ON
OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON IF (NOT ANDROID AND NOT IOS AND NOT WINRT) ) OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON IF (NOT ANDROID AND NOT IOS AND NOT WINRT) )
OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF (WIN32 AND NOT WINRT) ) OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF (WIN32 AND NOT WINRT) )
OCV_OPTION(WITH_INTELPERC "Include Intel Perceptual Computing support" OFF IF (WIN32 AND NOT WINRT) ) OCV_OPTION(WITH_INTELPERC "Include Intel Perceptual Computing support" OFF IF (WIN32 AND NOT WINRT) )
OCV_OPTION(WITH_IPP_A "Include Intel IPP_A support" OFF IF (MSVC OR X86 OR X86_64) )
OCV_OPTION(WITH_MATLAB "Include Matlab support" ON IF (NOT ANDROID AND NOT IOS AND NOT WINRT)) OCV_OPTION(WITH_MATLAB "Include Matlab support" ON IF (NOT ANDROID AND NOT IOS AND NOT WINRT))
OCV_OPTION(WITH_VA "Include VA support" OFF IF (UNIX AND NOT ANDROID) ) OCV_OPTION(WITH_VA "Include VA support" OFF IF (UNIX AND NOT ANDROID) )
OCV_OPTION(WITH_VA_INTEL "Include Intel VA-API/OpenCL support" OFF IF (UNIX AND NOT ANDROID) ) OCV_OPTION(WITH_VA_INTEL "Include Intel VA-API/OpenCL support" OFF IF (UNIX AND NOT ANDROID) )
@ -1311,15 +1310,16 @@ status(" Other third-party libraries:")
if(WITH_IPP AND HAVE_IPP) if(WITH_IPP AND HAVE_IPP)
status(" Use Intel IPP:" "${IPP_VERSION_STR} [${IPP_VERSION_MAJOR}.${IPP_VERSION_MINOR}.${IPP_VERSION_BUILD}]") status(" Use Intel IPP:" "${IPP_VERSION_STR} [${IPP_VERSION_MAJOR}.${IPP_VERSION_MINOR}.${IPP_VERSION_BUILD}]")
status(" at:" "${IPP_ROOT_DIR}") status(" at:" "${IPP_ROOT_DIR}")
if(NOT HAVE_IPP_ICV_ONLY) if(NOT HAVE_IPP_ICV)
status(" linked:" BUILD_WITH_DYNAMIC_IPP THEN "dynamic" ELSE "static") status(" linked:" BUILD_WITH_DYNAMIC_IPP THEN "dynamic" ELSE "static")
endif() endif()
if(HAVE_IPP_IW) if(HAVE_IPP_IW)
if(BUILD_IPP_IW) if(BUILD_IPP_IW)
status(" Use Intel IPP IW:" "build (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})") status(" Use Intel IPP IW:" "sources (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})")
else() else()
status(" Use Intel IPP IW:" "prebuilt binaries (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})") status(" Use Intel IPP IW:" "binaries (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})")
endif() endif()
status(" at:" "${IPP_IW_PATH}")
else() else()
status(" Use Intel IPP IW:" NO) status(" Use Intel IPP IW:" NO)
endif() endif()
@ -1328,10 +1328,6 @@ else()
status(" Use Intel IPP IW:" WITH_IPP AND NOT HAVE_IPP AND HAVE_IPP_IW THEN "IPP not found or implicitly disabled" ELSE NO) status(" Use Intel IPP IW:" WITH_IPP AND NOT HAVE_IPP AND HAVE_IPP_IW THEN "IPP not found or implicitly disabled" ELSE NO)
endif() endif()
if(DEFINED WITH_IPP_A)
status(" Use Intel IPP Async:" HAVE_IPP_A THEN "YES" ELSE NO)
endif(DEFINED WITH_IPP_A)
if(DEFINED WITH_VA) if(DEFINED WITH_VA)
status(" Use VA:" HAVE_VA THEN "YES" ELSE NO) status(" Use VA:" HAVE_VA THEN "YES" ELSE NO)
endif(DEFINED WITH_VA) endif(DEFINED WITH_VA)

@ -11,13 +11,13 @@
# #
# On return this will define: # On return this will define:
# #
# HAVE_IPP - True if Intel IPP found # HAVE_IPP - True if Intel IPP found
# HAVE_IPP_ICV_ONLY - True if Intel IPP ICV version is available # HAVE_IPP_ICV - True if Intel IPP ICV version is available
# IPP_ROOT_DIR - root of IPP installation # IPP_ROOT_DIR - root of IPP installation
# IPP_INCLUDE_DIRS - IPP include folder # IPP_INCLUDE_DIRS - IPP include folder
# IPP_LIBRARIES - IPP libraries that are used by OpenCV # IPP_LIBRARIES - IPP libraries that are used by OpenCV
# IPP_VERSION_STR - string with the newest detected IPP version # IPP_VERSION_STR - string with the newest detected IPP version
# IPP_VERSION_MAJOR - numbers of IPP version (MAJOR.MINOR.BUILD) # IPP_VERSION_MAJOR - numbers of IPP version (MAJOR.MINOR.BUILD)
# IPP_VERSION_MINOR # IPP_VERSION_MINOR
# IPP_VERSION_BUILD # IPP_VERSION_BUILD
# #
@ -25,7 +25,7 @@
# #
unset(HAVE_IPP CACHE) unset(HAVE_IPP CACHE)
unset(HAVE_IPP_ICV_ONLY) unset(HAVE_IPP_ICV)
unset(IPP_ROOT_DIR) unset(IPP_ROOT_DIR)
unset(IPP_INCLUDE_DIRS) unset(IPP_INCLUDE_DIRS)
unset(IPP_LIBRARIES) unset(IPP_LIBRARIES)
@ -79,7 +79,7 @@ endmacro()
macro(_ipp_not_supported) macro(_ipp_not_supported)
message(STATUS ${ARGN}) message(STATUS ${ARGN})
unset(HAVE_IPP) unset(HAVE_IPP)
unset(HAVE_IPP_ICV_ONLY) unset(HAVE_IPP_ICV)
unset(IPP_VERSION_STR) unset(IPP_VERSION_STR)
return() return()
endmacro() endmacro()
@ -92,7 +92,7 @@ macro(ipp_detect_version)
set(__msg) set(__msg)
if(EXISTS ${IPP_ROOT_DIR}/include/ippicv_redefs.h) if(EXISTS ${IPP_ROOT_DIR}/include/ippicv_redefs.h)
set(__msg " (ICV version)") set(__msg " (ICV version)")
set(HAVE_IPP_ICV_ONLY 1) set(HAVE_IPP_ICV 1)
elseif(EXISTS ${IPP_ROOT_DIR}/include/ipp.h) elseif(EXISTS ${IPP_ROOT_DIR}/include/ipp.h)
# nothing # nothing
else() else()
@ -118,7 +118,7 @@ macro(ipp_detect_version)
set(IPP_LIBRARY_DIR ${DIR}) set(IPP_LIBRARY_DIR ${DIR})
endmacro() endmacro()
if(APPLE AND NOT HAVE_IPP_ICV_ONLY) if(APPLE AND NOT HAVE_IPP_ICV)
_ipp_set_library_dir(${IPP_ROOT_DIR}/lib) _ipp_set_library_dir(${IPP_ROOT_DIR}/lib)
elseif(IPP_X64) elseif(IPP_X64)
_ipp_set_library_dir(${IPP_ROOT_DIR}/lib/intel64) _ipp_set_library_dir(${IPP_ROOT_DIR}/lib/intel64)
@ -128,7 +128,7 @@ macro(ipp_detect_version)
macro(_ipp_add_library name) macro(_ipp_add_library name)
# dynamic linking is only supported for standalone version of Intel IPP # dynamic linking is only supported for standalone version of Intel IPP
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY) if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV)
if (WIN32) if (WIN32)
set(IPP_LIB_PREFIX ${CMAKE_IMPORT_LIBRARY_PREFIX}) set(IPP_LIB_PREFIX ${CMAKE_IMPORT_LIBRARY_PREFIX})
set(IPP_LIB_SUFFIX ${CMAKE_IMPORT_LIBRARY_SUFFIX}) set(IPP_LIB_SUFFIX ${CMAKE_IMPORT_LIBRARY_SUFFIX})
@ -141,7 +141,7 @@ macro(ipp_detect_version)
set(IPP_LIB_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX}) set(IPP_LIB_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
endif () endif ()
if (EXISTS ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) if (EXISTS ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY) if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV)
# When using dynamic libraries from standalone Intel IPP it is your responsibility to install those on the target system # When using dynamic libraries from standalone Intel IPP it is your responsibility to install those on the target system
list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
else () else ()
@ -167,14 +167,14 @@ macro(ipp_detect_version)
set(IPP_PREFIX "ipp") set(IPP_PREFIX "ipp")
if(${IPP_VERSION_STR} VERSION_LESS "8.0") if(${IPP_VERSION_STR} VERSION_LESS "8.0")
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY) if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV)
set(IPP_SUFFIX "") # dynamic not threaded libs suffix Intel IPP 7.x set(IPP_SUFFIX "") # dynamic not threaded libs suffix Intel IPP 7.x
else () else ()
set(IPP_SUFFIX "_l") # static not threaded libs suffix Intel IPP 7.x set(IPP_SUFFIX "_l") # static not threaded libs suffix Intel IPP 7.x
endif () endif ()
else () else ()
if(WIN32) if(WIN32)
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY) if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV)
set(IPP_SUFFIX "") # dynamic not threaded libs suffix Intel IPP 8.x for Windows set(IPP_SUFFIX "") # dynamic not threaded libs suffix Intel IPP 8.x for Windows
else () else ()
set(IPP_SUFFIX "mt") # static not threaded libs suffix Intel IPP 8.x for Windows set(IPP_SUFFIX "mt") # static not threaded libs suffix Intel IPP 8.x for Windows
@ -184,7 +184,7 @@ macro(ipp_detect_version)
endif() endif()
endif() endif()
if(HAVE_IPP_ICV_ONLY) if(HAVE_IPP_ICV)
_ipp_add_library(icv) _ipp_add_library(icv)
else() else()
_ipp_add_library(cv) _ipp_add_library(cv)

@ -27,6 +27,7 @@ macro(ippiw_debugmsg MESSAGE)
message(STATUS "${MESSAGE}") message(STATUS "${MESSAGE}")
endif() endif()
endmacro() endmacro()
file(TO_CMAKE_PATH "${IPPROOT}" IPPROOT)
# This function detects Intel IPP IW version by analyzing .h file # This function detects Intel IPP IW version by analyzing .h file
macro(ippiw_setup PATH BUILD) macro(ippiw_setup PATH BUILD)
@ -153,7 +154,7 @@ ippiw_setup("${IPPROOT}/../${IW_PACKAGE_SUBDIR}/" 0)
# take Intel IPP IW from ICV package # take Intel IPP IW from ICV package
if(NOT HAVE_IPP_ICV_ONLY AND BUILD_IPP_IW) if(NOT HAVE_IPP_ICV AND BUILD_IPP_IW)
message(STATUS "Cannot find Intel IPP IW. Checking \"Intel IPP for OpenCV\" package") message(STATUS "Cannot find Intel IPP IW. Checking \"Intel IPP for OpenCV\" package")
set(TEMP_ROOT 0) set(TEMP_ROOT 0)
include("${OpenCV_SOURCE_DIR}/3rdparty/ippicv/ippicv.cmake") include("${OpenCV_SOURCE_DIR}/3rdparty/ippicv/ippicv.cmake")

@ -103,7 +103,7 @@
/* Intel Integrated Performance Primitives */ /* Intel Integrated Performance Primitives */
#cmakedefine HAVE_IPP #cmakedefine HAVE_IPP
#cmakedefine HAVE_IPP_ICV_ONLY #cmakedefine HAVE_IPP_ICV
#cmakedefine HAVE_IPP_IW #cmakedefine HAVE_IPP_IW
/* Intel IPP Async */ /* Intel IPP Async */

@ -693,8 +693,14 @@ CV_EXPORTS void setIppStatus(int status, const char * const funcname = NULL, c
int line = 0); int line = 0);
CV_EXPORTS int getIppStatus(); CV_EXPORTS int getIppStatus();
CV_EXPORTS String getIppErrorLocation(); CV_EXPORTS String getIppErrorLocation();
CV_EXPORTS_W bool useIPP(); CV_EXPORTS_W bool useIPP();
CV_EXPORTS_W void setUseIPP(bool flag); CV_EXPORTS_W void setUseIPP(bool flag);
CV_EXPORTS_W String getIppVersion();
// IPP Not-Exact mode. This function may force use of IPP then both IPP and OpenCV provide proper results
// but have internal accuracy differences which have to much direct or indirect impact on accuracy tests.
CV_EXPORTS_W bool useIPP_NE();
CV_EXPORTS_W void setUseIPP_NE(bool flag);
} // ipp } // ipp

@ -194,8 +194,6 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
#define IPP_DISABLE_WARPAFFINE 1 // Different results #define IPP_DISABLE_WARPAFFINE 1 // Different results
#define IPP_DISABLE_WARPPERSPECTIVE 1 // Different results #define IPP_DISABLE_WARPPERSPECTIVE 1 // Different results
#define IPP_DISABLE_REMAP 1 // Different results #define IPP_DISABLE_REMAP 1 // Different results
#define IPP_DISABLE_MORPH_ADV 1 // mask flipping in IPP
#define IPP_DISABLE_SORT_IDX 0 // different order in index tables
#define IPP_DISABLE_YUV_RGB 1 // accuracy difference #define IPP_DISABLE_YUV_RGB 1 // accuracy difference
#define IPP_DISABLE_RGB_YUV 1 // breaks OCL accuracy tests #define IPP_DISABLE_RGB_YUV 1 // breaks OCL accuracy tests
#define IPP_DISABLE_RGB_HSV 1 // breaks OCL accuracy tests #define IPP_DISABLE_RGB_HSV 1 // breaks OCL accuracy tests
@ -205,21 +203,12 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
#define IPP_DISABLE_XYZ_RGB 1 // big accuracy difference #define IPP_DISABLE_XYZ_RGB 1 // big accuracy difference
#define IPP_DISABLE_HAAR 1 // improper integration/results #define IPP_DISABLE_HAAR 1 // improper integration/results
#define IPP_DISABLE_HOUGH 1 // improper integration/results #define IPP_DISABLE_HOUGH 1 // improper integration/results
#define IPP_DISABLE_RESIZE_8U 1 // Incompatible accuracy
#define IPP_DISABLE_RESIZE_NEAREST 1 // Accuracy mismatch (max diff 1)
#define IPP_DISABLE_RESIZE_AREA 1 // Accuracy mismatch (max diff 1)
#define IPP_DISABLE_MINMAX_NAN_SSE42 1 // cv::minMaxIdx problem with NaN input
// Temporary disabled named IPP region. Performance // Temporary disabled named IPP region. Performance
#define IPP_DISABLE_PERF_COPYMAKE 1 // performance variations #define IPP_DISABLE_PERF_COPYMAKE 1 // performance variations
#define IPP_DISABLE_PERF_LUT 1 // there are no performance benefits (PR #2653) #define IPP_DISABLE_PERF_LUT 1 // there are no performance benefits (PR #2653)
#define IPP_DISABLE_PERF_TRUE_DIST_MT 1 // cv::distanceTransform OpenCV MT performance is better #define IPP_DISABLE_PERF_TRUE_DIST_MT 1 // cv::distanceTransform OpenCV MT performance is better
#define IPP_DISABLE_PERF_CANNY_MT 1 // cv::Canny OpenCV MT performance is better #define IPP_DISABLE_PERF_CANNY_MT 1 // cv::Canny OpenCV MT performance is better
#define IPP_DISABLE_PERF_HISTU32F_SSE42 1 // cv::calcHist optimizations problem
#define IPP_DISABLE_PERF_MORPH_SSE42 1 // cv::erode, cv::dilate optimizations problem
#define IPP_DISABLE_PERF_MAG_SSE42 1 // cv::magnitude optimizations problem
#define IPP_DISABLE_PERF_BOX16S_SSE42 1 // cv::boxFilter optimizations problem
#ifdef HAVE_IPP #ifdef HAVE_IPP
#include "ippversion.h" #include "ippversion.h"
@ -229,7 +218,7 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
#define IPP_VERSION_X100 (IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR*10 + IPP_VERSION_UPDATE) #define IPP_VERSION_X100 (IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR*10 + IPP_VERSION_UPDATE)
#ifdef HAVE_IPP_ICV_ONLY #ifdef HAVE_IPP_ICV
#define ICV_BASE #define ICV_BASE
#if IPP_VERSION_X100 >= 201700 #if IPP_VERSION_X100 >= 201700
#include "ippicv.h" #include "ippicv.h"
@ -241,6 +230,7 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
#endif #endif
#ifdef HAVE_IPP_IW #ifdef HAVE_IPP_IW
#include "iw++/iw.hpp" #include "iw++/iw.hpp"
#include "iw/iw_ll.h"
#endif #endif
#if IPP_VERSION_X100 >= 201700 #if IPP_VERSION_X100 >= 201700
@ -251,6 +241,17 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
#define setIppErrorStatus() cv::ipp::setIppStatus(-1, CV_Func, __FILE__, __LINE__) #define setIppErrorStatus() cv::ipp::setIppStatus(-1, CV_Func, __FILE__, __LINE__)
#define ippCPUID_AVX512_SKX (ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512VL|ippCPUID_AVX512BW|ippCPUID_AVX512DQ)
#define ippCPUID_AVX512_KNL (ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512PF|ippCPUID_AVX512ER)
namespace cv
{
namespace ipp
{
CV_EXPORTS unsigned long long getIppTopFeatures(); // Returns top major enabled IPP feature flag
}
}
static inline IppiSize ippiSize(size_t width, size_t height) static inline IppiSize ippiSize(size_t width, size_t height)
{ {
IppiSize size = { (int)width, (int)height }; IppiSize size = { (int)width, (int)height };
@ -322,7 +323,43 @@ static inline IppDataType ippiGetDataType(int depth)
(IppDataType)-1; (IppDataType)-1;
} }
static inline int ippiSuggestThreadsNum(size_t width, size_t height, size_t elemSize, double multiplier)
{
int threads = cv::getNumThreads();
if(threads > 1 && height >= 64)
{
size_t opMemory = (int)(width*height*elemSize*multiplier);
int l2cache = 0;
#if IPP_VERSION_X100 >= 201700
ippGetL2CacheSize(&l2cache);
#endif
if(!l2cache)
l2cache = 1 << 18;
return IPP_MAX(1, (IPP_MIN((int)(opMemory/l2cache), threads)));
}
return 1;
}
static inline int ippiSuggestThreadsNum(const cv::Mat &image, double multiplier)
{
return ippiSuggestThreadsNum(image.cols, image.rows, image.elemSize(), multiplier);
}
#ifdef HAVE_IPP_IW #ifdef HAVE_IPP_IW
static inline bool ippiCheckAnchor(int x, int y, int kernelWidth, int kernelHeight)
{
if(x != ((kernelWidth-1)/2) || y != ((kernelHeight-1)/2))
return 0;
else
return 1;
}
static inline ::ipp::IwiSize ippiGetSize(const cv::Size & size)
{
return ::ipp::IwiSize((IwSize)size.width, (IwSize)size.height);
}
static inline IwiDerivativeType ippiGetDerivType(int dx, int dy, bool nvert) static inline IwiDerivativeType ippiGetDerivType(int dx, int dy, bool nvert)
{ {
return (dx == 1 && dy == 0) ? ((nvert)?iwiDerivNVerFirst:iwiDerivVerFirst) : return (dx == 1 && dy == 0) ? ((nvert)?iwiDerivNVerFirst:iwiDerivVerFirst) :
@ -341,10 +378,10 @@ static inline void ippiGetImage(const cv::Mat &src, ::ipp::IwiImage &dst)
cv::Point offset; cv::Point offset;
src.locateROI(origSize, offset); src.locateROI(origSize, offset);
inMemBorder.borderLeft = (Ipp32u)offset.x; inMemBorder.left = (IwSize)offset.x;
inMemBorder.borderTop = (Ipp32u)offset.y; inMemBorder.top = (IwSize)offset.y;
inMemBorder.borderRight = (Ipp32u)(origSize.width - src.cols - offset.x); inMemBorder.right = (IwSize)(origSize.width - src.cols - offset.x);
inMemBorder.borderBottom = (Ipp32u)(origSize.height - src.rows - offset.y); inMemBorder.bottom = (IwSize)(origSize.height - src.rows - offset.y);
} }
dst.Init(ippiSize(src.size()), ippiGetDataType(src.depth()), src.channels(), inMemBorder, (void*)src.ptr(), src.step); dst.Init(ippiSize(src.size()), ippiGetDataType(src.depth()), src.channels(), inMemBorder, (void*)src.ptr(), src.step);
@ -357,7 +394,7 @@ static inline ::ipp::IwiImage ippiGetImage(const cv::Mat &src)
return image; return image;
} }
static inline IppiBorderType ippiGetBorder(::ipp::IwiImage &image, int ocvBorderType, IppiBorderSize &borderSize) static inline IppiBorderType ippiGetBorder(::ipp::IwiImage &image, int ocvBorderType, ipp::IwiBorderSize &borderSize)
{ {
int inMemFlags = 0; int inMemFlags = 0;
IppiBorderType border = ippiGetBorderType(ocvBorderType & ~cv::BORDER_ISOLATED); IppiBorderType border = ippiGetBorderType(ocvBorderType & ~cv::BORDER_ISOLATED);
@ -366,91 +403,60 @@ static inline IppiBorderType ippiGetBorder(::ipp::IwiImage &image, int ocvBorder
if(!(ocvBorderType & cv::BORDER_ISOLATED)) if(!(ocvBorderType & cv::BORDER_ISOLATED))
{ {
if(image.m_inMemSize.borderLeft) if(image.m_inMemSize.left)
{ {
if(image.m_inMemSize.borderLeft >= borderSize.borderLeft) if(image.m_inMemSize.left >= borderSize.left)
inMemFlags |= ippBorderInMemLeft; inMemFlags |= ippBorderInMemLeft;
else else
return (IppiBorderType)0; return (IppiBorderType)0;
} }
else else
borderSize.borderLeft = 0; borderSize.left = 0;
if(image.m_inMemSize.borderTop) if(image.m_inMemSize.top)
{ {
if(image.m_inMemSize.borderTop >= borderSize.borderTop) if(image.m_inMemSize.top >= borderSize.top)
inMemFlags |= ippBorderInMemTop; inMemFlags |= ippBorderInMemTop;
else else
return (IppiBorderType)0; return (IppiBorderType)0;
} }
else else
borderSize.borderTop = 0; borderSize.top = 0;
if(image.m_inMemSize.borderRight) if(image.m_inMemSize.right)
{ {
if(image.m_inMemSize.borderRight >= borderSize.borderRight) if(image.m_inMemSize.right >= borderSize.right)
inMemFlags |= ippBorderInMemRight; inMemFlags |= ippBorderInMemRight;
else else
return (IppiBorderType)0; return (IppiBorderType)0;
} }
else else
borderSize.borderRight = 0; borderSize.right = 0;
if(image.m_inMemSize.borderBottom) if(image.m_inMemSize.bottom)
{ {
if(image.m_inMemSize.borderBottom >= borderSize.borderBottom) if(image.m_inMemSize.bottom >= borderSize.bottom)
inMemFlags |= ippBorderInMemBottom; inMemFlags |= ippBorderInMemBottom;
else else
return (IppiBorderType)0; return (IppiBorderType)0;
} }
else else
borderSize.borderBottom = 0; borderSize.bottom = 0;
} }
else else
borderSize.borderLeft = borderSize.borderRight = borderSize.borderTop = borderSize.borderBottom = 0; borderSize.left = borderSize.right = borderSize.top = borderSize.bottom = 0;
return (IppiBorderType)(border|inMemFlags); return (IppiBorderType)(border|inMemFlags);
} }
static inline ::ipp::IwValue ippiGetValue(const cv::Scalar &scalar) static inline ::ipp::IwValueFloat ippiGetValue(const cv::Scalar &scalar)
{ {
return ::ipp::IwValue(scalar[0], scalar[1], scalar[2], scalar[3]); return ::ipp::IwValueFloat(scalar[0], scalar[1], scalar[2], scalar[3]);
} }
static inline int ippiSuggestThreadsNum(const ::ipp::IwiImage &image, double multiplier) static inline int ippiSuggestThreadsNum(const ::ipp::IwiImage &image, double multiplier)
{ {
int threads = cv::getNumThreads(); return ippiSuggestThreadsNum(image.m_size.width, image.m_size.height, image.m_typeSize*image.m_channels, multiplier);
if(image.m_size.height > threads)
{
size_t opMemory = (int)(image.m_step*image.m_size.height*multiplier);
int l2cache = 0;
#if IPP_VERSION_X100 >= 201700
ippGetL2CacheSize(&l2cache);
#endif
if(!l2cache)
l2cache = 1 << 18;
return IPP_MAX(1, (IPP_MIN((int)(opMemory/l2cache), threads)));
}
return 1;
} }
#endif #endif
static inline int ippiSuggestThreadsNum(const cv::Mat &image, double multiplier)
{
int threads = cv::getNumThreads();
if(image.rows > threads)
{
size_t opMemory = (int)(image.total()*multiplier);
int l2cache = 0;
#if IPP_VERSION_X100 >= 201700
ippGetL2CacheSize(&l2cache);
#endif
if(!l2cache)
l2cache = 1 << 18;
return IPP_MAX(1, (IPP_MIN((int)(opMemory/l2cache), threads)));
}
return 1;
}
// IPP temporary buffer helper // IPP temporary buffer helper
template<typename T> template<typename T>
class IppAutoBuffer class IppAutoBuffer

@ -86,12 +86,6 @@ static MergeFunc getMergeFunc(int depth)
} }
#ifdef HAVE_IPP #ifdef HAVE_IPP
#ifdef HAVE_IPP_IW
extern "C" {
IW_DECL(IppStatus) llwiCopySplit(const void *pSrc, int srcStep, void* const pDstOrig[], int dstStep,
IppiSize size, int typeSize, int channels);
}
#endif
namespace cv { namespace cv {
static bool ipp_split(const Mat& src, Mat* mv, int channels) static bool ipp_split(const Mat& src, Mat* mv, int channels)
@ -114,7 +108,7 @@ static bool ipp_split(const Mat& src, Mat* mv, int channels)
return false; return false;
} }
return CV_INSTRUMENT_FUN_IPP(llwiCopySplit, src.ptr(), (int)src.step, dstPtrs, (int)dstStep, size, (int)src.elemSize1(), channels) >= 0; return CV_INSTRUMENT_FUN_IPP(llwiCopySplit, src.ptr(), (int)src.step, dstPtrs, (int)dstStep, size, (int)src.elemSize1(), channels, 0) >= 0;
} }
else else
{ {
@ -132,7 +126,7 @@ static bool ipp_split(const Mat& src, Mat* mv, int channels)
for( size_t i = 0; i < it.nplanes; i++, ++it ) for( size_t i = 0; i < it.nplanes; i++, ++it )
{ {
if(CV_INSTRUMENT_FUN_IPP(llwiCopySplit, ptrs[0], 0, (void**)&ptrs[1], 0, size, (int)src.elemSize1(), channels) < 0) if(CV_INSTRUMENT_FUN_IPP(llwiCopySplit, ptrs[0], 0, (void**)&ptrs[1], 0, size, (int)src.elemSize1(), channels, 0) < 0)
return false; return false;
} }
return true; return true;
@ -273,12 +267,6 @@ void cv::split(InputArray _m, OutputArrayOfArrays _mv)
} }
#ifdef HAVE_IPP #ifdef HAVE_IPP
#ifdef HAVE_IPP_IW
extern "C" {
IW_DECL(IppStatus) llwiCopyMerge(const void* const pSrc[], int srcStep, void *pDst, int dstStep,
IppiSize size, int typeSize, int channels);
}
#endif
namespace cv { namespace cv {
static bool ipp_merge(const Mat* mv, Mat& dst, int channels) static bool ipp_merge(const Mat* mv, Mat& dst, int channels)
@ -301,7 +289,7 @@ static bool ipp_merge(const Mat* mv, Mat& dst, int channels)
return false; return false;
} }
return CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, srcPtrs, (int)srcStep, dst.ptr(), (int)dst.step, size, (int)mv[0].elemSize1(), channels) >= 0; return CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, srcPtrs, (int)srcStep, dst.ptr(), (int)dst.step, size, (int)mv[0].elemSize1(), channels, 0) >= 0;
} }
else else
{ {
@ -319,7 +307,7 @@ static bool ipp_merge(const Mat* mv, Mat& dst, int channels)
for( size_t i = 0; i < it.nplanes; i++, ++it ) for( size_t i = 0; i < it.nplanes; i++, ++it )
{ {
if(CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, (const void**)&ptrs[1], 0, ptrs[0], 0, size, (int)mv[0].elemSize1(), channels) < 0) if(CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, (const void**)&ptrs[1], 0, ptrs[0], 0, size, (int)mv[0].elemSize1(), channels, 0) < 0)
return false; return false;
} }
return true; return true;
@ -820,16 +808,10 @@ void cv::mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst,
} }
#ifdef HAVE_IPP #ifdef HAVE_IPP
#ifdef HAVE_IPP_IW
extern "C" {
IW_DECL(IppStatus) llwiCopyMixed(const void *pSrc, int srcStep, int srcChannels, void *pDst, int dstStep, int dstChannels,
IppiSize size, int typeSize, int channelsShift);
}
#endif
namespace cv namespace cv
{ {
static bool ipp_extractInsertChannel(const Mat &src, Mat &dst, int channel) static bool ipp_extractChannel(const Mat &src, Mat &dst, int channel)
{ {
#ifdef HAVE_IPP_IW #ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_IPP()
@ -840,14 +822,49 @@ static bool ipp_extractInsertChannel(const Mat &src, Mat &dst, int channel)
if(src.dims != dst.dims) if(src.dims != dst.dims)
return false; return false;
if(srcChannels == dstChannels || (srcChannels != 1 && dstChannels != 1)) if(src.dims <= 2)
{
IppiSize size = ippiSize(src.size());
return CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, src.ptr(), (int)src.step, srcChannels, channel, dst.ptr(), (int)dst.step, dstChannels, 0, size, (int)src.elemSize1()) >= 0;
}
else
{
const Mat *arrays[] = {&dst, NULL};
uchar *ptrs[2] = {NULL};
NAryMatIterator it(arrays, ptrs);
IppiSize size = {(int)it.size, 1};
for( size_t i = 0; i < it.nplanes; i++, ++it )
{
if(CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, ptrs[0], 0, srcChannels, channel, ptrs[1], 0, dstChannels, 0, size, (int)src.elemSize1()) < 0)
return false;
}
return true;
}
#else
CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(channel);
return false;
#endif
}
static bool ipp_insertChannel(const Mat &src, Mat &dst, int channel)
{
#ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP()
int srcChannels = src.channels();
int dstChannels = dst.channels();
if(src.dims != dst.dims)
return false; return false;
if(src.dims <= 2) if(src.dims <= 2)
{ {
IppiSize size = ippiSize(src.size()); IppiSize size = ippiSize(src.size());
return CV_INSTRUMENT_FUN_IPP(llwiCopyMixed, src.ptr(), (int)src.step, srcChannels, dst.ptr(), (int)dst.step, dstChannels, size, (int)src.elemSize1(), channel) >= 0; return CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, src.ptr(), (int)src.step, srcChannels, 0, dst.ptr(), (int)dst.step, dstChannels, channel, size, (int)src.elemSize1()) >= 0;
} }
else else
{ {
@ -859,7 +876,7 @@ static bool ipp_extractInsertChannel(const Mat &src, Mat &dst, int channel)
for( size_t i = 0; i < it.nplanes; i++, ++it ) for( size_t i = 0; i < it.nplanes; i++, ++it )
{ {
if(CV_INSTRUMENT_FUN_IPP(llwiCopyMixed, ptrs[0], 0, srcChannels, ptrs[1], 0, dstChannels, size, (int)src.elemSize1(), channel) < 0) if(CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, ptrs[0], 0, srcChannels, 0, ptrs[1], 0, dstChannels, channel, size, (int)src.elemSize1()) < 0)
return false; return false;
} }
return true; return true;
@ -893,7 +910,7 @@ void cv::extractChannel(InputArray _src, OutputArray _dst, int coi)
_dst.create(src.dims, &src.size[0], depth); _dst.create(src.dims, &src.size[0], depth);
Mat dst = _dst.getMat(); Mat dst = _dst.getMat();
CV_IPP_RUN_FAST(ipp_extractInsertChannel(src, dst, coi)) CV_IPP_RUN_FAST(ipp_extractChannel(src, dst, coi))
mixChannels(&src, 1, &dst, 1, ch, 1); mixChannels(&src, 1, &dst, 1, ch, 1);
} }
@ -917,7 +934,7 @@ void cv::insertChannel(InputArray _src, InputOutputArray _dst, int coi)
Mat src = _src.getMat(), dst = _dst.getMat(); Mat src = _src.getMat(), dst = _dst.getMat();
CV_IPP_RUN_FAST(ipp_extractInsertChannel(src, dst, coi)) CV_IPP_RUN_FAST(ipp_insertChannel(src, dst, coi))
mixChannels(&src, 1, &dst, 1, ch, 1); mixChannels(&src, 1, &dst, 1, ch, 1);
} }
@ -5152,7 +5169,7 @@ static bool ipp_convertTo(Mat &src, Mat &dst, double alpha, double beta)
iwSrc.Init(ippiSize(sz), srcDepth, 1, NULL, (void*)src.ptr(), src.step); iwSrc.Init(ippiSize(sz), srcDepth, 1, NULL, (void*)src.ptr(), src.step);
iwDst.Init(ippiSize(sz), dstDepth, 1, NULL, (void*)dst.ptr(), dst.step); iwDst.Init(ippiSize(sz), dstDepth, 1, NULL, (void*)dst.ptr(), dst.step);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwDst, alpha, beta, mode); CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwDst, alpha, beta, ::ipp::IwiScaleParams(mode));
} }
else else
{ {
@ -5168,7 +5185,7 @@ static bool ipp_convertTo(Mat &src, Mat &dst, double alpha, double beta)
iwSrc.m_ptr = ptrs[0]; iwSrc.m_ptr = ptrs[0];
iwDst.m_ptr = ptrs[1]; iwDst.m_ptr = ptrs[1];
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwDst, alpha, beta, mode); CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwDst, alpha, beta, ::ipp::IwiScaleParams(mode));
} }
} }
} }

@ -49,18 +49,6 @@
#include "precomp.hpp" #include "precomp.hpp"
#include "opencl_kernels_core.hpp" #include "opencl_kernels_core.hpp"
#ifdef HAVE_IPP_IW
extern "C" {
IW_DECL(IppStatus) llwiCopyMask(const void *pSrc, int srcStep, void *pDst, int dstStep,
IppiSize size, int typeSize, int channels, const Ipp8u *pMask, int maskStep);
IW_DECL(IppStatus) llwiSet(const double *pValue, void *pDst, int dstStep,
IppiSize size, IppDataType dataType, int channels);
IW_DECL(IppStatus) llwiSetMask(const double *pValue, void *pDst, int dstStep,
IppiSize size, IppDataType dataType, int channels, const Ipp8u *pMask, int maskStep);
IW_DECL(IppStatus) llwiCopyMakeBorder(const void *pSrc, IppSizeL srcStep, void *pDst, IppSizeL dstStep,
IppiSizeL size, IppDataType dataType, int channels, IppiBorderSize *pBorderSize, IppiBorderType border, const Ipp64f *pBorderVal);
}
#endif
namespace cv namespace cv
{ {
@ -480,9 +468,9 @@ static bool ipp_Mat_setTo_Mat(Mat &dst, Mat &_val, Mat &mask)
if(dst.dims <= 2) if(dst.dims <= 2)
{ {
IppiSize size = ippiSize(dst.size()); IppiSize size = ippiSize(dst.size());
IppDataType dataType = ippiGetDataType(dst.depth()); IppDataType dataType = ippiGetDataType(dst.depth());
::ipp::IwValue s; ::ipp::IwValueFloat s;
convertAndUnrollScalar(_val, CV_MAKETYPE(CV_64F, dst.channels()), (uchar*)((Ipp64f*)s), 1); convertAndUnrollScalar(_val, CV_MAKETYPE(CV_64F, dst.channels()), (uchar*)((Ipp64f*)s), 1);
return CV_INSTRUMENT_FUN_IPP(llwiSetMask, s, dst.ptr(), (int)dst.step, size, dataType, dst.channels(), mask.ptr(), (int)mask.step) >= 0; return CV_INSTRUMENT_FUN_IPP(llwiSetMask, s, dst.ptr(), (int)dst.step, size, dataType, dst.channels(), mask.ptr(), (int)mask.step) >= 0;
@ -493,9 +481,9 @@ static bool ipp_Mat_setTo_Mat(Mat &dst, Mat &_val, Mat &mask)
uchar *ptrs[2] = {NULL}; uchar *ptrs[2] = {NULL};
NAryMatIterator it(arrays, ptrs); NAryMatIterator it(arrays, ptrs);
IppiSize size = {(int)it.size, 1}; IppiSize size = {(int)it.size, 1};
IppDataType dataType = ippiGetDataType(dst.depth()); IppDataType dataType = ippiGetDataType(dst.depth());
::ipp::IwValue s; ::ipp::IwValueFloat s;
convertAndUnrollScalar(_val, CV_MAKETYPE(CV_64F, dst.channels()), (uchar*)((Ipp64f*)s), 1); convertAndUnrollScalar(_val, CV_MAKETYPE(CV_64F, dst.channels()), (uchar*)((Ipp64f*)s), 1);
for( size_t i = 0; i < it.nplanes; i++, ++it) for( size_t i = 0; i < it.nplanes; i++, ++it)
@ -717,7 +705,7 @@ static bool ipp_flip(Mat &src, Mat &dst, int flip_mode)
::ipp::IwiImage iwSrc = ippiGetImage(src); ::ipp::IwiImage iwSrc = ippiGetImage(src);
::ipp::IwiImage iwDst = ippiGetImage(dst); ::ipp::IwiImage iwDst = ippiGetImage(dst);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, &iwSrc, &iwDst, ippMode); CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, iwSrc, iwDst, ippMode);
} }
catch(::ipp::IwException) catch(::ipp::IwException)
{ {
@ -1155,13 +1143,13 @@ static bool ipp_copyMakeBorder( Mat &_src, Mat &_dst, int top, int bottom,
if(_src.dims > 2) if(_src.dims > 2)
return false; return false;
Rect dstRect(borderSize.borderLeft, borderSize.borderTop, Rect dstRect(borderSize.left, borderSize.top,
_dst.cols - borderSize.borderRight - borderSize.borderLeft, _dst.cols - borderSize.right - borderSize.left,
_dst.rows - borderSize.borderBottom - borderSize.borderTop); _dst.rows - borderSize.bottom - borderSize.top);
Mat subDst = Mat(_dst, dstRect); Mat subDst = Mat(_dst, dstRect);
Mat *pSrc = &_src; Mat *pSrc = &_src;
return CV_INSTRUMENT_FUN_IPP(llwiCopyMakeBorder, pSrc->ptr(), pSrc->step, subDst.ptr(), subDst.step, size, dataType, _src.channels(), &borderSize, borderType, &value[0]) >= 0; return CV_INSTRUMENT_FUN_IPP(llwiCopyMakeBorder, pSrc->ptr(), pSrc->step, subDst.ptr(), subDst.step, size, dataType, _src.channels(), borderSize, borderType, &value[0]) >= 0;
#else #else
CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(top); CV_UNUSED(bottom); CV_UNUSED(left); CV_UNUSED(right); CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(top); CV_UNUSED(bottom); CV_UNUSED(left); CV_UNUSED(right);
CV_UNUSED(_borderType); CV_UNUSED(value); CV_UNUSED(_borderType); CV_UNUSED(value);

@ -44,7 +44,8 @@ void magnitude32f(const float* x, const float* y, float* mag, int len)
CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION()
CALL_HAL(magnitude32f, cv_hal_magnitude32f, x, y, mag, len); CALL_HAL(magnitude32f, cv_hal_magnitude32f, x, y, mag, len);
CV_IPP_RUN(!IPP_DISABLE_PERF_MAG_SSE42 || (ipp::getIppFeatures()&ippCPUID_AVX), CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0); // SSE42 performance issues
CV_IPP_RUN(IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42, CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0);
CV_CPU_DISPATCH(magnitude32f, (x, y, mag, len), CV_CPU_DISPATCH(magnitude32f, (x, y, mag, len),
CV_CPU_DISPATCH_MODES_ALL); CV_CPU_DISPATCH_MODES_ALL);
@ -55,7 +56,8 @@ void magnitude64f(const double* x, const double* y, double* mag, int len)
CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION()
CALL_HAL(magnitude64f, cv_hal_magnitude64f, x, y, mag, len); CALL_HAL(magnitude64f, cv_hal_magnitude64f, x, y, mag, len);
CV_IPP_RUN(!IPP_DISABLE_PERF_MAG_SSE42 || (ipp::getIppFeatures()&ippCPUID_AVX), CV_INSTRUMENT_FUN_IPP(ippsMagnitude_64f, x, y, mag, len) >= 0); // SSE42 performance issues
CV_IPP_RUN(IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42, CV_INSTRUMENT_FUN_IPP(ippsMagnitude_64f, x, y, mag, len) >= 0);
CV_CPU_DISPATCH(magnitude64f, (x, y, mag, len), CV_CPU_DISPATCH(magnitude64f, (x, y, mag, len),
CV_CPU_DISPATCH_MODES_ALL); CV_CPU_DISPATCH_MODES_ALL);
@ -91,7 +93,6 @@ void sqrt32f(const float* src, float* dst, int len)
CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION()
CALL_HAL(sqrt32f, cv_hal_sqrt32f, src, dst, len); CALL_HAL(sqrt32f, cv_hal_sqrt32f, src, dst, len);
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsSqrt_32f_A21, src, dst, len) >= 0);
CV_CPU_DISPATCH(sqrt32f, (src, dst, len), CV_CPU_DISPATCH(sqrt32f, (src, dst, len),
CV_CPU_DISPATCH_MODES_ALL); CV_CPU_DISPATCH_MODES_ALL);
@ -103,7 +104,6 @@ void sqrt64f(const double* src, double* dst, int len)
CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION()
CALL_HAL(sqrt64f, cv_hal_sqrt64f, src, dst, len); CALL_HAL(sqrt64f, cv_hal_sqrt64f, src, dst, len);
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsSqrt_64f_A50, src, dst, len) >= 0);
CV_CPU_DISPATCH(sqrt64f, (src, dst, len), CV_CPU_DISPATCH(sqrt64f, (src, dst, len),
CV_CPU_DISPATCH_MODES_ALL); CV_CPU_DISPATCH_MODES_ALL);

@ -3101,7 +3101,7 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
{ {
double r = 0; double r = 0;
#if ARITHM_USE_IPP #if ARITHM_USE_IPP
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiDotProd_8u64f_C1R, src1, len*sizeof(uchar), src2, len*sizeof(uchar), ippiSize(len, 1), &r) >= 0, r); CV_IPP_RUN(IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42, CV_INSTRUMENT_FUN_IPP(ippiDotProd_8u64f_C1R, src1, len*sizeof(uchar), src2, len*sizeof(uchar), ippiSize(len, 1), &r) >= 0, r);
#endif #endif
int i = 0; int i = 0;

@ -4356,7 +4356,6 @@ template<typename T> static void sortIdx_( const Mat& src, Mat& dst, int flags )
} }
#ifdef HAVE_IPP #ifdef HAVE_IPP
#if !IPP_DISABLE_SORT_IDX
typedef IppStatus (CV_STDCALL *IppSortIndexFunc)(const void* pSrc, Ipp32s srcStrideBytes, Ipp32s *pDstIndx, int len, Ipp8u *pBuffer); typedef IppStatus (CV_STDCALL *IppSortIndexFunc)(const void* pSrc, Ipp32s srcStrideBytes, Ipp32s *pDstIndx, int len, Ipp8u *pBuffer);
static IppSortIndexFunc getSortIndexFunc(int depth, bool sortDescending) static IppSortIndexFunc getSortIndexFunc(int depth, bool sortDescending)
@ -4435,7 +4434,6 @@ static bool ipp_sortIdx( const Mat& src, Mat& dst, int flags )
return true; return true;
} }
#endif #endif
#endif
typedef void (*SortFunc)(const Mat& src, Mat& dst, int flags); typedef void (*SortFunc)(const Mat& src, Mat& dst, int flags);
} }
@ -4472,9 +4470,8 @@ void cv::sortIdx( InputArray _src, OutputArray _dst, int flags )
_dst.release(); _dst.release();
_dst.create( src.size(), CV_32S ); _dst.create( src.size(), CV_32S );
dst = _dst.getMat(); dst = _dst.getMat();
#if !IPP_DISABLE_SORT_IDX
CV_IPP_RUN_FAST(ipp_sortIdx(src, dst, flags)); CV_IPP_RUN_FAST(ipp_sortIdx(src, dst, flags));
#endif
static SortFunc tab[] = static SortFunc tab[] =
{ {

@ -257,7 +257,8 @@ struct CoreTLSData
//#ifdef HAVE_OPENCL //#ifdef HAVE_OPENCL
device(0), useOpenCL(-1), device(0), useOpenCL(-1),
//#endif //#endif
useIPP(-1) useIPP(-1),
useIPP_NE(-1)
#ifdef HAVE_TEGRA_OPTIMIZATION #ifdef HAVE_TEGRA_OPTIMIZATION
,useTegra(-1) ,useTegra(-1)
#endif #endif
@ -272,7 +273,8 @@ struct CoreTLSData
ocl::Queue oclQueue; // the queue used for running a kernel, see also getQueue, Kernel::run ocl::Queue oclQueue; // the queue used for running a kernel, see also getQueue, Kernel::run
int useOpenCL; // 1 - use, 0 - do not use, -1 - auto/not initialized int useOpenCL; // 1 - use, 0 - do not use, -1 - auto/not initialized
//#endif //#endif
int useIPP; // 1 - use, 0 - do not use, -1 - auto/not initialized int useIPP; // 1 - use, 0 - do not use, -1 - auto/not initialized
int useIPP_NE; // 1 - use, 0 - do not use, -1 - auto/not initialized
#ifdef HAVE_TEGRA_OPTIMIZATION #ifdef HAVE_TEGRA_OPTIMIZATION
int useTegra; // 1 - use, 0 - do not use, -1 - auto/not initialized int useTegra; // 1 - use, 0 - do not use, -1 - auto/not initialized
#endif #endif

@ -1294,6 +1294,12 @@ static bool ipp_countNonZero( Mat &src, int &res )
{ {
CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 < 201801
// Poor performance of SSE42
if(cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
return false;
#endif
Ipp32s count = 0; Ipp32s count = 0;
int depth = src.depth(); int depth = src.depth();
@ -2531,15 +2537,16 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI
#if IPP_VERSION_X100 >= 700 #if IPP_VERSION_X100 >= 700
CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_IPP()
#if IPP_DISABLE_MINMAX_NAN_SSE42 #if IPP_VERSION_X100 < 201800
// cv::minMaxIdx problem with NaN input
// Disable 32F processing only // Disable 32F processing only
if(src.depth() == CV_32F && !(ipp::getIppFeatures()&ippCPUID_AVX)) if(src.depth() == CV_32F && cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
return false; return false;
#endif #endif
#if IPP_VERSION_X100 < 201801
// cv::minMaxIdx problem with index positions on AVX // cv::minMaxIdx problem with index positions on AVX
#if IPP_VERSION_X100 < 201810 if(!mask.empty() && _maxIdx && cv::ipp::getIppTopFeatures() != ippCPUID_SSE42)
if(!mask.empty() && _maxIdx && ipp::getIppFeatures()&ippCPUID_AVX)
return false; return false;
#endif #endif
@ -2550,8 +2557,8 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI
IppiPoint minIdx = {-1, -1}; IppiPoint minIdx = {-1, -1};
IppiPoint maxIdx = {-1, -1}; IppiPoint maxIdx = {-1, -1};
float *pMinVal = (_minVal)?&minVal:NULL; float *pMinVal = (_minVal || _minIdx)?&minVal:NULL;
float *pMaxVal = (_maxVal)?&maxVal:NULL; float *pMaxVal = (_maxVal || _maxIdx)?&maxVal:NULL;
IppiPoint *pMinIdx = (_minIdx)?&minIdx:NULL; IppiPoint *pMinIdx = (_minIdx)?&minIdx:NULL;
IppiPoint *pMaxIdx = (_maxIdx)?&maxIdx:NULL; IppiPoint *pMaxIdx = (_maxIdx)?&maxIdx:NULL;
@ -2564,6 +2571,8 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI
ippMinMaxFun = ipp_minIdx_wrap; ippMinMaxFun = ipp_minIdx_wrap;
else if(_maxVal && !_maxIdx && _minVal && !_minIdx) else if(_maxVal && !_maxIdx && _minVal && !_minIdx)
ippMinMaxFun = ipp_minMax_wrap; ippMinMaxFun = ipp_minMax_wrap;
else if(!_maxVal && !_maxIdx && !_minVal && !_minIdx)
return false;
else else
ippMinMaxFun = ipp_minMaxIndex_wrap; ippMinMaxFun = ipp_minMaxIndex_wrap;
} }
@ -2582,8 +2591,12 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI
*_maxVal = maxVal; *_maxVal = maxVal;
if(_minIdx) if(_minIdx)
{ {
#if IPP_VERSION_X100 < 201801
// Should be just ippStsNoOperation check, but there is a bug in the function so we need additional checks // Should be just ippStsNoOperation check, but there is a bug in the function so we need additional checks
if(status == ippStsNoOperation && !mask.empty() && !pMinIdx->x && !pMinIdx->y) if(status == ippStsNoOperation && !mask.empty() && !pMinIdx->x && !pMinIdx->y)
#else
if(status == ippStsNoOperation)
#endif
{ {
_minIdx[0] = -1; _minIdx[0] = -1;
_minIdx[1] = -1; _minIdx[1] = -1;
@ -2596,8 +2609,12 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI
} }
if(_maxIdx) if(_maxIdx)
{ {
#if IPP_VERSION_X100 < 201801
// Should be just ippStsNoOperation check, but there is a bug in the function so we need additional checks // Should be just ippStsNoOperation check, but there is a bug in the function so we need additional checks
if(status == ippStsNoOperation && !mask.empty() && !pMaxIdx->x && !pMaxIdx->y) if(status == ippStsNoOperation && !mask.empty() && !pMaxIdx->x && !pMaxIdx->y)
#else
if(status == ippStsNoOperation)
#endif
{ {
_maxIdx[0] = -1; _maxIdx[0] = -1;
_maxIdx[1] = -1; _maxIdx[1] = -1;

@ -1909,55 +1909,146 @@ struct IPPInitSingleton
public: public:
IPPInitSingleton() IPPInitSingleton()
{ {
useIPP = true; useIPP = true;
ippStatus = 0; useIPP_NE = false;
funcname = NULL; ippStatus = 0;
filename = NULL; funcname = NULL;
linen = 0; filename = NULL;
ippFeatures = 0; linen = 0;
cpuFeatures = 0;
ippFeatures = 0;
ippTopFeatures = 0;
pIppLibInfo = NULL;
ippStatus = ippGetCpuFeatures(&cpuFeatures, NULL);
if(ippStatus < 0)
{
std::cerr << "ERROR: IPP cannot detect CPU features, IPP was disabled " << std::endl;
useIPP = false;
return;
}
ippFeatures = cpuFeatures;
bool unsupported = false;
const char* pIppEnv = getenv("OPENCV_IPP"); const char* pIppEnv = getenv("OPENCV_IPP");
cv::String env = pIppEnv; cv::String env = pIppEnv;
if(env.size()) if(env.size())
{ {
env = env.toLowerCase();
if(env.substr(0, 2) == "ne")
{
useIPP_NE = true;
env = env.substr(3, env.size());
}
if(env == "disabled") if(env == "disabled")
{ {
std::cerr << "WARNING: IPP was disabled by OPENCV_IPP environment variable" << std::endl; std::cerr << "WARNING: IPP was disabled by OPENCV_IPP environment variable" << std::endl;
useIPP = false; useIPP = false;
} }
#if IPP_VERSION_X100 >= 900
else if(env == "sse")
ippFeatures = ippCPUID_SSE;
else if(env == "sse2")
ippFeatures = ippCPUID_SSE2;
else if(env == "sse3")
ippFeatures = ippCPUID_SSE3;
else if(env == "ssse3")
ippFeatures = ippCPUID_SSSE3;
else if(env == "sse41")
ippFeatures = ippCPUID_SSE41;
else if(env == "sse42") else if(env == "sse42")
ippFeatures = ippCPUID_SSE42; {
else if(env == "avx") if(!(cpuFeatures&ippCPUID_SSE42))
ippFeatures = ippCPUID_AVX; unsupported = true;
ippFeatures = ippCPUID_MMX|ippCPUID_SSE|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42;
ippFeatures |= (cpuFeatures&ippCPUID_AES);
ippFeatures |= (cpuFeatures&ippCPUID_CLMUL);
ippFeatures |= (cpuFeatures&ippCPUID_SHA);
}
else if(env == "avx2") else if(env == "avx2")
ippFeatures = ippCPUID_AVX2; {
if(!(cpuFeatures&ippCPUID_AVX2))
unsupported = true;
ippFeatures = ippCPUID_MMX|ippCPUID_SSE|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42|ippCPUID_AVX|ippCPUID_AVX2;
ippFeatures |= (cpuFeatures&ippCPUID_AES);
ippFeatures |= (cpuFeatures&ippCPUID_CLMUL);
ippFeatures |= (cpuFeatures&ippCPUID_F16C);
ippFeatures |= (cpuFeatures&ippCPUID_ADCOX);
ippFeatures |= (cpuFeatures&ippCPUID_RDSEED);
ippFeatures |= (cpuFeatures&ippCPUID_PREFETCHW);
ippFeatures |= (cpuFeatures&ippCPUID_MPX);
}
#if defined (_M_AMD64) || defined (__x86_64__)
else if(env == "avx512")
{
if(!(cpuFeatures&ippCPUID_AVX512F))
unsupported = true;
ippFeatures = ippCPUID_MMX|ippCPUID_SSE|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42|ippCPUID_AVX|ippCPUID_AVX2|ippCPUID_AVX512F;
ippFeatures |= (cpuFeatures&ippCPUID_AES);
ippFeatures |= (cpuFeatures&ippCPUID_CLMUL);
ippFeatures |= (cpuFeatures&ippCPUID_F16C);
ippFeatures |= (cpuFeatures&ippCPUID_ADCOX);
ippFeatures |= (cpuFeatures&ippCPUID_RDSEED);
ippFeatures |= (cpuFeatures&ippCPUID_PREFETCHW);
ippFeatures |= (cpuFeatures&ippCPUID_MPX);
ippFeatures |= (cpuFeatures&ippCPUID_AVX512CD);
ippFeatures |= (cpuFeatures&ippCPUID_AVX512VL);
ippFeatures |= (cpuFeatures&ippCPUID_AVX512BW);
ippFeatures |= (cpuFeatures&ippCPUID_AVX512DQ);
ippFeatures |= (cpuFeatures&ippCPUID_AVX512ER);
ippFeatures |= (cpuFeatures&ippCPUID_AVX512PF);
ippFeatures |= (cpuFeatures&ippCPUID_AVX512VBMI);
}
#endif #endif
else else
std::cerr << "ERROR: Improper value of OPENCV_IPP: " << env.c_str() << std::endl; std::cerr << "ERROR: Improper value of OPENCV_IPP: " << env.c_str() << ". Correct values are: disabled, sse42, avx2, avx512 (Intel64 only)" << std::endl;
}
if(unsupported)
{
std::cerr << "WARNING: selected IPP features are not supported by CPU. IPP was initialized with default features" << std::endl;
ippFeatures = cpuFeatures;
}
// Disable AVX1 since we don't track regressions for it. SSE42 will be used instead
if(cpuFeatures&ippCPUID_AVX && !(cpuFeatures&ippCPUID_AVX2))
ippFeatures &= ~ippCPUID_AVX;
// IPP integrations in OpenCV support only SSE4.2, AVX2 and AVX-512 optimizations.
if(!(
cpuFeatures&ippCPUID_AVX512F ||
cpuFeatures&ippCPUID_AVX2 ||
cpuFeatures&ippCPUID_SSE42
))
{
useIPP = false;
return;
} }
IPP_INITIALIZER(ippFeatures) IPP_INITIALIZER(ippFeatures)
ippFeatures = ippGetEnabledCpuFeatures(); ippFeatures = ippGetEnabledCpuFeatures();
// Detect top level optimizations to make comparison easier for optimizations dependent conditions
if(ippFeatures&ippCPUID_AVX512F)
{
if((ippFeatures&ippCPUID_AVX512_SKX) == ippCPUID_AVX512_SKX)
ippTopFeatures = ippCPUID_AVX512_SKX;
else if((ippFeatures&ippCPUID_AVX512_KNL) == ippCPUID_AVX512_KNL)
ippTopFeatures = ippCPUID_AVX512_KNL;
else
ippTopFeatures = ippCPUID_AVX512F; // Unknown AVX512 configuration
}
else if(ippFeatures&ippCPUID_AVX2)
ippTopFeatures = ippCPUID_AVX2;
else if(ippFeatures&ippCPUID_SSE42)
ippTopFeatures = ippCPUID_SSE42;
pIppLibInfo = ippiGetLibVersion();
} }
bool useIPP; public:
bool useIPP;
bool useIPP_NE;
int ippStatus; // 0 - all is ok, -1 - IPP functions failed int ippStatus; // 0 - all is ok, -1 - IPP functions failed
const char *funcname; const char *funcname;
const char *filename; const char *filename;
int linen; int linen;
Ipp64u ippFeatures; Ipp64u ippFeatures;
Ipp64u cpuFeatures;
Ipp64u ippTopFeatures;
const IppLibraryVersion *pIppLibInfo;
}; };
static IPPInitSingleton& getIPPSingleton() static IPPInitSingleton& getIPPSingleton()
@ -1983,6 +2074,17 @@ int getIppFeatures()
#endif #endif
} }
unsigned long long getIppTopFeatures();
unsigned long long getIppTopFeatures()
{
#ifdef HAVE_IPP
return getIPPSingleton().ippTopFeatures;
#else
return 0;
#endif
}
void setIppStatus(int status, const char * const _funcname, const char * const _filename, int _line) void setIppStatus(int status, const char * const _funcname, const char * const _filename, int _line)
{ {
#ifdef HAVE_IPP #ifdef HAVE_IPP
@ -2013,6 +2115,19 @@ String getIppErrorLocation()
#endif #endif
} }
String getIppVersion()
{
#ifdef HAVE_IPP
const IppLibraryVersion *pInfo = getIPPSingleton().pIppLibInfo;
if(pInfo)
return format("%s %s %s", pInfo->Name, pInfo->Version, pInfo->BuildDate);
else
return String("error");
#else
return String("disabled");
#endif
}
bool useIPP() bool useIPP()
{ {
#ifdef HAVE_IPP #ifdef HAVE_IPP
@ -2038,6 +2153,31 @@ void setUseIPP(bool flag)
#endif #endif
} }
bool useIPP_NE()
{
#ifdef HAVE_IPP
CoreTLSData* data = getCoreTlsData().get();
if(data->useIPP_NE < 0)
{
data->useIPP_NE = getIPPSingleton().useIPP_NE;
}
return (data->useIPP_NE > 0);
#else
return false;
#endif
}
void setUseIPP_NE(bool flag)
{
CoreTLSData* data = getCoreTlsData().get();
#ifdef HAVE_IPP
data->useIPP_NE = (getIPPSingleton().useIPP_NE)?flag:false;
#else
(void)flag;
data->useIPP_NE = false;
#endif
}
} // namespace ipp } // namespace ipp
} // namespace cv } // namespace cv

@ -95,7 +95,7 @@ static bool ipp_Canny(const Mat& src , const Mat& dx_, const Mat& dy_, Mat& dst,
ippiGetImage(dy_, iwSrcDy); ippiGetImage(dy_, iwSrcDy);
ippiGetImage(dst, iwDst); ippiGetImage(dst, iwDst);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCannyDeriv, &iwSrcDx, &iwSrcDy, &iwDst, norm, low, high); CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCannyDeriv, iwSrcDx, iwSrcDy, iwDst, low, high, ::ipp::IwiFilterCannyDerivParams(norm));
} }
catch (::ipp::IwException ex) catch (::ipp::IwException ex)
{ {
@ -121,7 +121,7 @@ static bool ipp_Canny(const Mat& src , const Mat& dx_, const Mat& dy_, Mat& dst,
ippiGetImage(src, iwSrc); ippiGetImage(src, iwSrc);
ippiGetImage(dst, iwDst); ippiGetImage(dst, iwDst);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCanny, &iwSrc, &iwDst, ippFilterSobel, kernel, norm, low, high, ippBorderRepl); CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCanny, iwSrc, iwDst, low, high, ::ipp::IwiFilterCannyParams(ippFilterSobel, kernel, norm), ippBorderRepl);
} }
catch (::ipp::IwException) catch (::ipp::IwException)
{ {

@ -469,11 +469,32 @@ static ippiGeneralFunc ippiRGB2GrayC4Tab[] =
0, (ippiGeneralFunc)ippiRGBToGray_32f_AC4C1R, 0, 0 0, (ippiGeneralFunc)ippiRGBToGray_32f_AC4C1R, 0, 0
}; };
static ippiGeneralFunc ippiCopyP3C3RTab[] =
static IppStatus ippiGrayToRGB_C1C3R(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, IppiSize roiSize)
{ {
(ippiGeneralFunc)ippiCopy_8u_P3C3R, 0, (ippiGeneralFunc)ippiCopy_16u_P3C3R, 0, return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_8u_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize);
0, (ippiGeneralFunc)ippiCopy_32f_P3C3R, 0, 0 }
}; static IppStatus ippiGrayToRGB_C1C3R(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, IppiSize roiSize)
{
return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_16u_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize);
}
static IppStatus ippiGrayToRGB_C1C3R(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, IppiSize roiSize)
{
return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_32f_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize);
}
static IppStatus ippiGrayToRGB_C1C4R(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, IppiSize roiSize, Ipp8u aval)
{
return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_8u_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval);
}
static IppStatus ippiGrayToRGB_C1C4R(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, IppiSize roiSize, Ipp16u aval)
{
return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_16u_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval);
}
static IppStatus ippiGrayToRGB_C1C4R(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, IppiSize roiSize, Ipp32f aval)
{
return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_32f_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval);
}
#if !IPP_DISABLE_RGB_XYZ #if !IPP_DISABLE_RGB_XYZ
static ippiGeneralFunc ippiRGB2XYZTab[] = static ippiGeneralFunc ippiRGB2XYZTab[] =
@ -580,48 +601,31 @@ private:
Ipp32f coeffs[3]; Ipp32f coeffs[3];
}; };
template <typename T>
struct IPPGray2BGRFunctor struct IPPGray2BGRFunctor
{ {
IPPGray2BGRFunctor(ippiGeneralFunc _func) : IPPGray2BGRFunctor(){}
ippiGrayToBGR(_func)
{
}
bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
{ {
if (ippiGrayToBGR == 0) return ippiGrayToRGB_C1C3R((T*)src, srcStep, (T*)dst, dstStep, ippiSize(cols, rows)) >= 0;
return false;
const void* srcarray[3] = { src, src, src };
return CV_INSTRUMENT_FUN_IPP(ippiGrayToBGR, srcarray, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0;
} }
private:
ippiGeneralFunc ippiGrayToBGR;
}; };
template <typename T>
struct IPPGray2BGRAFunctor struct IPPGray2BGRAFunctor
{ {
IPPGray2BGRAFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _depth) : IPPGray2BGRAFunctor()
ippiColorConvertGeneral(_func1), ippiColorConvertReorder(_func2), depth(_depth)
{ {
alpha = ColorChannel<T>::max();
} }
bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
{ {
if (ippiColorConvertGeneral == 0 || ippiColorConvertReorder == 0) return ippiGrayToRGB_C1C4R((T*)src, srcStep, (T*)dst, dstStep, ippiSize(cols, rows), alpha) >= 0;
return false;
const void* srcarray[3] = { src, src, src };
Mat temp(rows, cols, CV_MAKETYPE(depth, 3));
if(CV_INSTRUMENT_FUN_IPP(ippiColorConvertGeneral, srcarray, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows)) < 0)
return false;
int order[4] = {0, 1, 2, 3};
return CV_INSTRUMENT_FUN_IPP(ippiColorConvertReorder, temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0;
} }
private:
ippiGeneralFunc ippiColorConvertGeneral; T alpha;
ippiReorderFunc ippiColorConvertReorder;
int depth;
}; };
struct IPPReorderGeneralFunctor struct IPPReorderGeneralFunctor
@ -9744,18 +9748,27 @@ void cvtGraytoBGR(const uchar * src_data, size_t src_step,
#if defined(HAVE_IPP) && IPP_VERSION_X100 >= 700 #if defined(HAVE_IPP) && IPP_VERSION_X100 >= 700
CV_IPP_CHECK() CV_IPP_CHECK()
{ {
bool ippres = false;
if(dcn == 3) if(dcn == 3)
{ {
if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, if( depth == CV_8U )
IPPGray2BGRFunctor(ippiCopyP3C3RTab[depth])) ) ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor<Ipp8u>());
return; else if( depth == CV_16U )
ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor<Ipp16u>());
else
ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor<Ipp32f>());
} }
else if(dcn == 4) else if(dcn == 4)
{ {
if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, if( depth == CV_8U )
IPPGray2BGRAFunctor(ippiCopyP3C3RTab[depth], ippiSwapChannelsC3C4RTab[depth], depth)) ) ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor<Ipp8u>());
return; else if( depth == CV_16U )
ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor<Ipp16u>());
else
ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor<Ipp32f>());
} }
if(ippres)
return;
} }
#endif #endif

@ -312,7 +312,7 @@ static bool ipp_Deriv(InputArray _src, OutputArray _dst, int dx, int dy, int ksi
::ipp::IwiImage iwDstProc = iwDst; ::ipp::IwiImage iwDstProc = iwDst;
::ipp::IwiBorderSize borderSize(maskSize); ::ipp::IwiBorderSize borderSize(maskSize);
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize)); ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
if(!ippBorder.m_borderType) if(!ippBorder)
return false; return false;
if(srcType == ipp8u && dstType == ipp8u) if(srcType == ipp8u && dstType == ipp8u)
@ -324,17 +324,17 @@ static bool ipp_Deriv(InputArray _src, OutputArray _dst, int dx, int dy, int ksi
{ {
iwSrc -= borderSize; iwSrc -= borderSize;
iwSrcProc.Alloc(iwSrc.m_size, ipp32f, channels); iwSrcProc.Alloc(iwSrc.m_size, ipp32f, channels);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwSrcProc, 1, 0, ippAlgHintFast); CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwSrcProc, 1, 0, ::ipp::IwiScaleParams(ippAlgHintFast));
iwSrcProc += borderSize; iwSrcProc += borderSize;
} }
if(useScharr) if(useScharr)
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterScharr, &iwSrcProc, &iwDstProc, derivType, maskSize, ippBorder); CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterScharr, iwSrcProc, iwDstProc, derivType, maskSize, ::ipp::IwDefault(), ippBorder);
else else
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterSobel, &iwSrcProc, &iwDstProc, derivType, maskSize, ippBorder); CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterSobel, iwSrcProc, iwDstProc, derivType, maskSize, ::ipp::IwDefault(), ippBorder);
if(useScale) if(useScale)
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwDstProc, &iwDst, scale, delta, ippAlgHintFast); CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwDstProc, iwDst, scale, delta, ::ipp::IwiScaleParams(ippAlgHintFast));
} }
catch (::ipp::IwException) catch (::ipp::IwException)
{ {
@ -732,7 +732,7 @@ static bool ipp_Laplacian(InputArray _src, OutputArray _dst, int ksize, double s
::ipp::IwiImage iwDstProc = iwDst; ::ipp::IwiImage iwDstProc = iwDst;
::ipp::IwiBorderSize borderSize(maskSize); ::ipp::IwiBorderSize borderSize(maskSize);
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize)); ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
if(!ippBorder.m_borderType) if(!ippBorder)
return false; return false;
if(srcType == ipp8u && dstType == ipp8u) if(srcType == ipp8u && dstType == ipp8u)
@ -744,14 +744,14 @@ static bool ipp_Laplacian(InputArray _src, OutputArray _dst, int ksize, double s
{ {
iwSrc -= borderSize; iwSrc -= borderSize;
iwSrcProc.Alloc(iwSrc.m_size, ipp32f, channels); iwSrcProc.Alloc(iwSrc.m_size, ipp32f, channels);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwSrcProc, 1, 0); CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwSrcProc, 1, 0);
iwSrcProc += borderSize; iwSrcProc += borderSize;
} }
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterLaplacian, &iwSrcProc, &iwDstProc, maskSize, ippBorder); CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterLaplacian, iwSrcProc, iwDstProc, maskSize, ::ipp::IwDefault(), ippBorder);
if(useScale) if(useScale)
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwDstProc, &iwDst, scale, delta); CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwDstProc, iwDst, scale, delta);
} }
catch (::ipp::IwException ex) catch (::ipp::IwException ex)

@ -4560,170 +4560,88 @@ static bool replacementFilter2D(int stype, int dtype, int kernel_type,
} }
#ifdef HAVE_IPP #ifdef HAVE_IPP
typedef IppStatus(CV_STDCALL* IppiFilterBorder)( static bool ippFilter2D(int stype, int dtype, int kernel_type,
const void* pSrc, int srcStep, void* pDst, int dstStep, uchar * src_data, size_t src_step,
IppiSize dstRoiSize, IppiBorderType border, const void* borderValue, uchar * dst_data, size_t dst_step,
const IppiFilterBorderSpec* pSpec, Ipp8u* pBuffer); int width, int height,
int full_width, int full_height,
static IppiFilterBorder getIppFunc(int stype) int offset_x, int offset_y,
uchar * kernel_data, size_t kernel_step,
int kernel_width, int kernel_height,
int anchor_x, int anchor_y,
double delta, int borderType,
bool isSubmatrix)
{ {
switch (stype) #ifdef HAVE_IPP_IW
{ CV_INSTRUMENT_REGION_IPP();
case CV_8UC1:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_8u_C1R);
case CV_8UC3:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_8u_C3R);
case CV_8UC4:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_8u_C4R);
case CV_16UC1:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16u_C1R);
case CV_16UC3:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16u_C3R);
case CV_16UC4:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16u_C4R);
case CV_16SC1:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16s_C1R);
case CV_16SC3:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16s_C3R);
case CV_16SC4:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16s_C4R);
case CV_32FC1:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_32f_C1R);
case CV_32FC3:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_32f_C3R);
case CV_32FC4:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_32f_C4R);
default:
return 0;
}
}
template <int kdepth> ::ipp::IwiSize iwSize(width, height);
struct IppFilterTrait { }; ::ipp::IwiSize kernelSize(kernel_width, kernel_height);
IppDataType type = ippiGetDataType(CV_MAT_DEPTH(stype));
int channels = CV_MAT_CN(stype);
template <> CV_UNUSED(isSubmatrix);
struct IppFilterTrait<CV_16S>
{
enum { kernel_type_id = CV_16SC1 };
typedef Ipp16s kernel_type;
typedef IppStatus(CV_STDCALL* copy_fun_type)(const kernel_type* pSrc, int srcStep, kernel_type* pDst, int dstStep, IppiSize roiSize);
inline static copy_fun_type get_copy_fun() { return ippiCopy_16s_C1R; }
inline static IppStatus runInit(const kernel_type* pKernel, IppiSize kernelSize, int divisor, IppDataType dataType, int numChannels, IppRoundMode roundMode, IppiFilterBorderSpec* pSpec)
{
return ippiFilterBorderInit_16s(pKernel, kernelSize, divisor, dataType, numChannels, roundMode, pSpec);
}
};
template <> #if IPP_VERSION_X100 >= 201700 && IPP_VERSION_X100 <= 201702 // IPP bug with 1x1 kernel
struct IppFilterTrait<CV_32F> if(kernel_width == 1 && kernel_height == 1)
{ return false;
enum { kernel_type_id = CV_32FC1 }; #endif
typedef Ipp32f kernel_type;
typedef IppStatus(CV_STDCALL* copy_fun_type)(const kernel_type* pSrc, int srcStep, kernel_type* pDst, int dstStep, IppiSize roiSize);
inline static copy_fun_type get_copy_fun() { return ippiCopy_32f_C1R; }
inline static IppStatus runInit(const kernel_type* pKernel, IppiSize kernelSize, int divisor, IppDataType dataType, int numChannels, IppRoundMode roundMode, IppiFilterBorderSpec* pSpec)
{
CV_UNUSED(divisor);
return ippiFilterBorderInit_32f(pKernel, kernelSize, dataType, numChannels, roundMode, pSpec);
}
};
template <int kdepth> #if IPP_VERSION_X100 < 201801
static bool ippFilter2D(int stype, int dtype, // Too big difference compared to OpenCV FFT-based convolution
uchar * src_data, size_t src_step, if(kernel_type == CV_32FC1 && (type == ipp16s || type == ipp16u) && (kernel_width > 7 || kernel_height > 7))
uchar * dst_data, size_t dst_step, return false;
int width, int height,
uchar * kernel_data, size_t kernel_step,
int kernel_width, int kernel_height,
int anchor_x, int anchor_y,
double delta, int borderType, bool isSubmatrix)
{
CV_INSTRUMENT_REGION_IPP();
typedef IppFilterTrait<kdepth> trait; // Poor optimization for big kernels
typedef typename trait::kernel_type kernel_type; if(kernel_width > 7 || kernel_height > 7)
return false;
#endif
IppAutoBuffer<IppiFilterBorderSpec> spec; if(src_data == dst_data)
IppAutoBuffer<Ipp8u> buffer; return false;
IppAutoBuffer<kernel_type> kernelBuffer;
IppiBorderType ippBorderType;
int src_type;
Point anchor(anchor_x, anchor_y); if(stype != dtype)
#if IPP_VERSION_X100 >= 900 return false;
Point ippAnchor((kernel_width - 1) / 2, (kernel_height - 1) / 2);
#else
Point ippAnchor(kernel_width >> 1, kernel_height >> 1);
#endif
bool isIsolated = (borderType & BORDER_ISOLATED) != 0;
int borderTypeNI = borderType & ~BORDER_ISOLATED;
ippBorderType = ippiGetBorderType(borderTypeNI);
int ddepth = CV_MAT_DEPTH(dtype);
int sdepth = CV_MAT_DEPTH(stype);
#if IPP_VERSION_X100 >= 201700 && IPP_VERSION_X100 <= 201702 // IPP bug with 1x1 kernel if(kernel_type != CV_16SC1 && kernel_type != CV_32FC1)
if(kernel_width == 1 && kernel_height == 1)
return false; return false;
#endif
bool runIpp = true // TODO: Implement offset for 8u, 16u
&& (borderTypeNI == BORDER_CONSTANT || borderTypeNI == BORDER_REPLICATE) if(std::fabs(delta) >= DBL_EPSILON)
&& (sdepth == ddepth)
&& (getIppFunc(stype))
&& ((int)ippBorderType > 0)
&& (!isSubmatrix || isIsolated)
&& (std::fabs(delta - 0) < DBL_EPSILON)
&& (ippAnchor == anchor)
&& src_data != dst_data;
if (!runIpp)
return false; return false;
src_type = stype; if(!ippiCheckAnchor(anchor_x, anchor_y, kernel_width, kernel_height))
int cn = CV_MAT_CN(stype);
IppiSize kernelSize = { kernel_width, kernel_height };
IppDataType dataType = ippiGetDataType(ddepth);
IppDataType kernelType = ippiGetDataType(kdepth);
Ipp32s specSize = 0;
Ipp32s bufsize = 0;
IppiSize dstRoiSize = { width, height };
IppStatus status;
status = ippiFilterBorderGetSize(kernelSize, dstRoiSize, dataType, kernelType, cn, &specSize, &bufsize);
if (status < 0)
return false; return false;
kernel_type* pKerBuffer = (kernel_type*)kernel_data; try
size_t good_kernel_step = sizeof(kernel_type) * static_cast<size_t>(kernelSize.width); {
#if IPP_VERSION_X100 >= 900 ::ipp::IwiBorderSize iwBorderSize;
if (kernel_step != good_kernel_step) { ::ipp::IwiBorderType iwBorderType;
kernelBuffer.allocate((int)good_kernel_step * kernelSize.height); ::ipp::IwiImage iwKernel(ippiSize(kernel_width, kernel_height), ippiGetDataType(CV_MAT_DEPTH(kernel_type)), CV_MAT_CN(kernel_type), 0, (void*)kernel_data, kernel_step);
status = trait::get_copy_fun()((kernel_type*)kernel_data, (int)kernel_step, kernelBuffer, (int)good_kernel_step, kernelSize); ::ipp::IwiImage iwSrc(iwSize, type, channels, ::ipp::IwiBorderSize(offset_x, offset_y, full_width-offset_x-width, full_height-offset_y-height), (void*)src_data, src_step);
if (status < 0) ::ipp::IwiImage iwDst(iwSize, type, channels, ::ipp::IwiBorderSize(offset_x, offset_y, full_width-offset_x-width, full_height-offset_y-height), (void*)dst_data, dst_step);
iwBorderSize = ::ipp::iwiSizeToBorderSize(kernelSize);
iwBorderType = ippiGetBorder(iwSrc, borderType, iwBorderSize);
if(!iwBorderType)
return false; return false;
pKerBuffer = kernelBuffer;
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilter, iwSrc, iwDst, iwKernel, ::ipp::IwiFilterParams(1, 0, ippAlgHintNone, ippRndFinancial), iwBorderType);
} }
#else catch(::ipp::IwException ex)
kernelBuffer.Alloc(good_kernel_step * kernelSize.height); {
Mat kerFlip(Size(kernelSize.width, kernelSize.height), trait::kernel_type_id, kernelBuffer, (int)good_kernel_step);
Mat kernel(Size(kernel_width, kernel_height), trait::kernel_type_id, kernel_data, kernel_step);
flip(kernel, kerFlip, -1);
pKerBuffer = kernelBuffer;
#endif
spec.allocate(specSize);
buffer.allocate(bufsize);
status = trait::runInit(pKerBuffer, kernelSize, 0, dataType, cn, ippRndFinancial, spec);
if (status < 0) {
return false; return false;
} }
IppiFilterBorder ippiFilterBorder = getIppFunc(src_type);
kernel_type borderValue[4] = { 0, 0, 0, 0 }; return true;
status = CV_INSTRUMENT_FUN_IPP(ippiFilterBorder, src_data, (int)src_step, dst_data, (int)dst_step, dstRoiSize, ippBorderType, borderValue, spec, buffer); #else
if (status >= 0) { CV_UNUSED(stype); CV_UNUSED(dtype); CV_UNUSED(kernel_type); CV_UNUSED(src_data); CV_UNUSED(src_step);
CV_IMPL_ADD(CV_IMPL_IPP); CV_UNUSED(dst_data); CV_UNUSED(dst_step); CV_UNUSED(width); CV_UNUSED(height); CV_UNUSED(full_width);
return true; CV_UNUSED(full_height); CV_UNUSED(offset_x); CV_UNUSED(offset_y); CV_UNUSED(kernel_data); CV_UNUSED(kernel_step);
} CV_UNUSED(kernel_width); CV_UNUSED(kernel_height); CV_UNUSED(anchor_x); CV_UNUSED(anchor_y); CV_UNUSED(delta);
CV_UNUSED(borderType); CV_UNUSED(isSubmatrix);
return false; return false;
#endif
} }
#endif #endif
@ -4902,34 +4820,18 @@ void filter2D(int stype, int dtype, int kernel_type,
delta, borderType, isSubmatrix); delta, borderType, isSubmatrix);
if (res) if (res)
return; return;
#ifdef HAVE_IPP
CV_IPP_CHECK() CV_IPP_RUN_FAST(ippFilter2D(stype, dtype, kernel_type,
{ src_data, src_step,
res = false; dst_data, dst_step,
if (kernel_type == CV_32FC1) { width, height,
res = ippFilter2D<CV_32F>(stype, dtype, full_width, full_height,
src_data, src_step, offset_x, offset_y,
dst_data, dst_step, kernel_data, kernel_step,
width, height, kernel_width, kernel_height,
kernel_data, kernel_step, anchor_x, anchor_y,
kernel_width, kernel_height, delta, borderType, isSubmatrix))
anchor_x, anchor_y,
delta, borderType, isSubmatrix);
}
else if (kernel_type == CV_16SC1) {
res = ippFilter2D<CV_16S>(stype, dtype,
src_data, src_step,
dst_data, dst_step,
width, height,
kernel_data, kernel_step,
kernel_width, kernel_height,
anchor_x, anchor_y,
delta, borderType, isSubmatrix);
}
if (res)
return;
}
#endif
res = dftFilter2D(stype, dtype, kernel_type, res = dftFilter2D(stype, dtype, kernel_type,
src_data, src_step, src_data, src_step,
dst_data, dst_step, dst_data, dst_step,

@ -370,6 +370,18 @@ void crossCorr( const Mat& src, const Mat& templ, Mat& dst,
Point anchor=Point(0,0), double delta=0, Point anchor=Point(0,0), double delta=0,
int borderType=BORDER_REFLECT_101 ); int borderType=BORDER_REFLECT_101 );
} }
#ifdef HAVE_IPP_IW
static inline bool ippiCheckAnchor(cv::Point anchor, cv::Size ksize)
{
anchor = cv::normalizeAnchor(anchor, ksize);
if(anchor.x != ((ksize.width-1)/2) || anchor.y != ((ksize.height-1)/2))
return 0;
else
return 1;
}
#endif
#endif #endif

@ -1194,10 +1194,8 @@ static IppiHistogram_C1 getIppiHistogramFunction_C1(int type)
{ {
IppiHistogram_C1 ippFunction = IppiHistogram_C1 ippFunction =
(type == CV_8UC1) ? (IppiHistogram_C1)ippiHistogram_8u_C1R : (type == CV_8UC1) ? (IppiHistogram_C1)ippiHistogram_8u_C1R :
#if IPP_VERSION_X100 >= 201700 || !(defined HAVE_IPP_ICV_ONLY)
(type == CV_16UC1) ? (IppiHistogram_C1)ippiHistogram_16u_C1R : (type == CV_16UC1) ? (IppiHistogram_C1)ippiHistogram_16u_C1R :
(type == CV_32FC1) ? (IppiHistogram_C1)ippiHistogram_32f_C1R : (type == CV_32FC1) ? (IppiHistogram_C1)ippiHistogram_32f_C1R :
#endif
NULL; NULL;
return ippFunction; return ippFunction;
@ -1401,9 +1399,9 @@ static bool ipp_calchist(const Mat &image, Mat &hist, int histSize, const float*
{ {
CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 < 201801
// No SSE42 optimization for uniform 32f // No SSE42 optimization for uniform 32f
#if IPP_DISABLE_PERF_HISTU32F_SSE42 if(uniform && image.depth() == CV_32F && cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
if(uniform && image.depth() == CV_32F && !(ipp::getIppFeatures()&ippCPUID_AVX))
return false; return false;
#endif #endif

@ -2971,8 +2971,8 @@ public:
try try
{ {
::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, m_dst.m_size.width, range.end - range.start); ::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start);
CV_INSTRUMENT_FUN_IPP(iwiResize, &m_src, &m_dst, &roi); CV_INSTRUMENT_FUN_IPP(iwiResize, m_src, m_dst, ippBorderRepl, tile);
} }
catch(::ipp::IwException) catch(::ipp::IwException)
{ {
@ -3007,7 +3007,7 @@ public:
{0, scaleY, shift+0.5*scaleY} {0, scaleY, shift+0.5*scaleY}
}; };
iwiWarpAffine.InitAlloc(m_src.m_size, m_dst.m_size, m_src.m_dataType, m_src.m_channels, coeffs, ippWarpForward, inter, ::ipp::IwiWarpAffineParams(0, 0.75, 0), ippBorderRepl); iwiWarpAffine.InitAlloc(m_src.m_size, m_dst.m_size, m_src.m_dataType, m_src.m_channels, coeffs, iwTransForward, inter, ::ipp::IwiWarpAffineParams(0, 0, 0.75), ippBorderRepl);
m_ok = true; m_ok = true;
} }
@ -3021,8 +3021,8 @@ public:
try try
{ {
::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, m_dst.m_size.width, range.end - range.start); ::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start);
CV_INSTRUMENT_FUN_IPP(iwiWarpAffine, &m_src, &m_dst, &roi); CV_INSTRUMENT_FUN_IPP(iwiWarpAffine, m_src, m_dst, tile);
} }
catch(::ipp::IwException) catch(::ipp::IwException)
{ {
@ -3053,23 +3053,28 @@ static bool ipp_resize(const uchar * src_data, size_t src_step, int src_width, i
if(ippInter < 0) if(ippInter < 0)
return false; return false;
#if IPP_DISABLE_RESIZE_NEAREST // Resize which doesn't match OpenCV exactly
if(ippInter == ippNearest) if(!cv::ipp::useIPP_NE())
return false; {
#endif if(ippInter == ippNearest || ippInter == ippSuper || (ippDataType == ipp8u && ippInter == ippLinear))
return false;
#if IPP_DISABLE_RESIZE_AREA }
if(ippInter == ippSuper)
return false;
#endif
if(ippInter != ippLinear && ippDataType == ipp64f) if(ippInter != ippLinear && ippDataType == ipp64f)
return false; return false;
// Accuracy mismatch is 1 but affects detectors greatly #if IPP_VERSION_X100 < 201801
#if IPP_DISABLE_RESIZE_8U // Degradations on int^2 linear downscale
if(ippDataType == ipp8u && ippInter == ippLinear) if(ippDataType != ipp64f && ippInter == ippLinear && inv_scale_x < 1 && inv_scale_y < 1) // if downscale
return false; {
int scale_x = (int)(1/inv_scale_x);
int scale_y = (int)(1/inv_scale_y);
if(1/inv_scale_x - scale_x < DBL_EPSILON && 1/inv_scale_y - scale_y < DBL_EPSILON) // if integer
{
if(!(scale_x&(scale_x-1)) && !(scale_y&(scale_y-1))) // if power of 2
return false;
}
}
#endif #endif
bool affine = false; bool affine = false;

@ -571,6 +571,12 @@ static bool ipp_moments(Mat &src, Moments &m )
#if IPP_VERSION_X100 >= 900 #if IPP_VERSION_X100 >= 900
CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 < 201801
// Degradations for CV_8UC1
if(src.type() == CV_8UC1)
return false;
#endif
IppiSize roi = { src.cols, src.rows }; IppiSize roi = { src.cols, src.rows };
IppiPoint point = { 0, 0 }; IppiPoint point = { 0, 0 };
int type = src.type(); int type = src.type();

@ -1140,20 +1140,41 @@ static bool ippMorph(int op, int src_type, int dst_type,
#ifdef HAVE_IPP_IW #ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_IPP()
// Problem with SSE42 optimizations #if IPP_VERSION_X100 < 201800
#if IPP_DISABLE_PERF_MORPH_SSE42 // Problem with SSE42 optimizations performance
if(!(ipp::getIppFeatures()&ippCPUID_AVX)) if(cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
return false;
// Different mask flipping
if(op == MORPH_GRADIENT)
return false;
#endif
#if IPP_VERSION_X100 < 201801
// Problem with AVX512 optimizations performance
if(cv::ipp::getIppTopFeatures()&ippCPUID_AVX512F)
return false;
// Multiple iterations on small mask is not effective in current integration
// Implace imitation for 3x3 kernel is not efficient
// Advanced morphology for small mask introduces degradations
if((iterations > 1 || src_data == dst_data || (op != MORPH_ERODE && op != MORPH_DILATE)) && kernel_width*kernel_height < 25)
return false;
// Skip even mask sizes for advanced morphology since they can produce out of spec writes
if((op != MORPH_ERODE && op != MORPH_DILATE) && (!(kernel_width&1) || !(kernel_height&1)))
return false; return false;
#endif #endif
::ipp::IwAutoBuffer<Ipp8u> kernelTempBuffer; IppAutoBuffer<Ipp8u> kernelTempBuffer;
::ipp::IwiBorderSize iwBorderSize; ::ipp::IwiBorderSize iwBorderSize;
::ipp::IwiBorderSize iwBorderSize2;
::ipp::IwiBorderType iwBorderType; ::ipp::IwiBorderType iwBorderType;
::ipp::IwiBorderType iwBorderType2;
::ipp::IwiImage iwMask; ::ipp::IwiImage iwMask;
::ipp::IwiImage iwInter; ::ipp::IwiImage iwInter;
::ipp::IwiSize initSize(width, height); ::ipp::IwiSize initSize(width, height);
::ipp::IwiSize kernelSize(kernel_width, kernel_height); ::ipp::IwiSize kernelSize(kernel_width, kernel_height);
::ipp::IwiPoint anchor(anchor_x, anchor_y);
IppDataType type = ippiGetDataType(CV_MAT_DEPTH(src_type)); IppDataType type = ippiGetDataType(CV_MAT_DEPTH(src_type));
int channels = CV_MAT_CN(src_type); int channels = CV_MAT_CN(src_type);
IwiMorphologyType morphType = ippiGetMorphologyType(op); IwiMorphologyType morphType = ippiGetMorphologyType(op);
@ -1169,68 +1190,99 @@ static bool ippMorph(int op, int src_type, int dst_type,
if(src_type != dst_type) if(src_type != dst_type)
return false; return false;
if(!ippiCheckAnchor(anchor_x, anchor_y, kernel_width, kernel_height))
return false;
try try
{ {
::ipp::IwiImage iwSrc(initSize, type, channels, ::ipp::IwiBorderSize(roi_x, roi_y, roi_width-roi_x-width, roi_height-roi_y-height), (void*)src_data, src_step); ::ipp::IwiImage iwSrc(initSize, type, channels, ::ipp::IwiBorderSize(roi_x, roi_y, roi_width-roi_x-width, roi_height-roi_y-height), (void*)src_data, src_step);
::ipp::IwiImage iwDst(initSize, type, channels, ::ipp::IwiBorderSize(roi_x2, roi_y2, roi_width2-roi_x2-width, roi_height2-roi_y2-height), (void*)dst_data, dst_step); ::ipp::IwiImage iwDst(initSize, type, channels, ::ipp::IwiBorderSize(roi_x2, roi_y2, roi_width2-roi_x2-width, roi_height2-roi_y2-height), (void*)dst_data, dst_step);
::ipp::iwiFilterMorphology_GetBorderSize(morphType, kernelSize, iwBorderSize); iwBorderSize = ::ipp::iwiSizeToBorderSize(kernelSize);
if(morphType != iwiMorphErode && morphType != iwiMorphDilate)
{
iwBorderSize.borderLeft /= 2;
iwBorderSize.borderTop /= 2;
iwBorderSize.borderRight /= 2;
iwBorderSize.borderBottom /= 2;
}
iwBorderType = ippiGetBorder(iwSrc, borderType, iwBorderSize); iwBorderType = ippiGetBorder(iwSrc, borderType, iwBorderSize);
if(!iwBorderType.m_borderType || ((iwBorderType.m_borderFlags&ippBorderInMem) && (iwBorderType.m_borderFlags&ippBorderInMem) != ippBorderInMem)) if(!iwBorderType)
return false; return false;
if(iterations > 1)
if(iwBorderType.m_borderType == ippBorderConst)
{ {
if(Vec<double, 4>(borderValue) == morphologyDefaultBorderValue()) // Check dst border for second and later iterations
iwBorderType.m_borderType = ippBorderDefault; iwBorderSize2 = ::ipp::iwiSizeToBorderSize(kernelSize);
else iwBorderType2 = ippiGetBorder(iwDst, borderType, iwBorderSize2);
iwBorderType.SetValue(borderValue[0], borderValue[1], borderValue[2], borderValue[3]); if(!iwBorderType2)
return false;
} }
if(morphType != iwiMorphErode && morphType != iwiMorphDilate)
if(morphType != iwiMorphErode && morphType != iwiMorphDilate && morphType != iwiMorphGradient)
{ {
if((iwBorderType.m_borderFlags&ippBorderInMem) == ippBorderInMem) // For now complex morphology support only InMem around all sides. This will be improved later.
iwBorderType.m_borderFlags = ippBorderFirstStageInMem; if((iwBorderType&ippBorderInMem) && (iwBorderType&ippBorderInMem) != ippBorderInMem)
return false;
if((iwBorderType&ippBorderInMem) == ippBorderInMem)
{
iwBorderType &= ~ippBorderInMem;
iwBorderType &= ippBorderFirstStageInMem;
}
} }
// Test input parameters on dummy structures if(iwBorderType.StripFlags() == ippBorderConst)
{ {
::ipp::IwiImage testSrc(initSize, type, channels); if(Vec<double, 4>(borderValue) == morphologyDefaultBorderValue())
::ipp::IwiImage testDst(initSize, type, channels); iwBorderType.SetType(ippBorderDefault);
::ipp::IwiImage testMask(ippiSize(kernel_width, kernel_height), ipp8u, CV_MAT_CN(kernel_type)); else
iwBorderType.m_value = ::ipp::IwValueFloat(borderValue[0], borderValue[1], borderValue[2], borderValue[3]);
::ipp::iwiFilterMorphology(&testSrc, &testDst, morphType, &testMask, &anchor, iwBorderType);
} }
iwMask.Init(ippiSize(kernel_width, kernel_height), ippiGetDataType(CV_MAT_DEPTH(kernel_type)), CV_MAT_CN(kernel_type), 0, kernel_data, kernel_step); iwMask.Init(ippiSize(kernel_width, kernel_height), ippiGetDataType(CV_MAT_DEPTH(kernel_type)), CV_MAT_CN(kernel_type), 0, kernel_data, kernel_step);
if((int)kernel_step != kernel_width || CV_MAT_DEPTH(kernel_type) != CV_8U)
::ipp::IwiImage iwMaskLoc = iwMask;
if(morphType == iwiMorphDilate)
{ {
kernelTempBuffer.Alloc(kernel_width*kernel_height); iwMaskLoc.Alloc(iwMask.m_size, iwMask.m_dataType, iwMask.m_channels);
::ipp::IwiImage iwMaskTmp(ippiSize(kernel_width, kernel_height), ipp8u, 1, 0, kernelTempBuffer, kernel_width); ::ipp::iwiMirror(iwMask, iwMaskLoc, ippAxsBoth);
::ipp::iwiScale(&iwMask, &iwMaskTmp, 1, 0); iwMask = iwMaskLoc;
iwMask = iwMaskTmp;
} }
if(iterations > 1) if(iterations > 1)
{ {
iwInter.Alloc(initSize, type, channels); // OpenCV uses in mem border from dst for two and more iterations, so we need to keep this border in intermediate image
iwInter.Alloc(initSize, type, channels, iwBorderSize2);
::ipp::IwiImage *pSwap[2] = {&iwInter, &iwDst}; ::ipp::IwiImage *pSwap[2] = {&iwInter, &iwDst};
::ipp::IwiBorderType iterBorder = iwBorderType; CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, iwSrc, iwInter, morphType, iwMask, ::ipp::IwDefault(), iwBorderType);
iterBorder.m_borderFlags = 0;
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, &iwSrc, &iwInter, morphType, &iwMask, NULL, iwBorderType); // Copy border only
{
if(iwBorderSize2.top)
{
::ipp::IwiRoi borderRoi(-iwBorderSize2.left, -iwBorderSize2.top, iwDst.m_size.width+iwBorderSize2.left+iwBorderSize2.right, iwBorderSize2.top);
::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi);
::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi);
}
if(iwBorderSize2.bottom)
{
::ipp::IwiRoi borderRoi(-iwBorderSize2.left, iwDst.m_size.height, iwDst.m_size.width+iwBorderSize2.left+iwBorderSize2.right, iwBorderSize2.bottom);
::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi);
::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi);
}
if(iwBorderSize2.left)
{
::ipp::IwiRoi borderRoi(-iwBorderSize2.left, 0, iwBorderSize2.left, iwDst.m_size.height);
::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi);
::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi);
}
if(iwBorderSize2.right)
{
::ipp::IwiRoi borderRoi(iwDst.m_size.width, 0, iwBorderSize2.left, iwDst.m_size.height);
::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi);
::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi);
}
}
iwBorderType2.SetType(iwBorderType);
for(int i = 0; i < iterations-1; i++) for(int i = 0; i < iterations-1; i++)
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, pSwap[i&0x1], pSwap[(i+1)&0x1], morphType, &iwMask, NULL, iterBorder); CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, *pSwap[i&0x1], *pSwap[(i+1)&0x1], morphType, iwMask, ::ipp::IwDefault(), iwBorderType2);
if(iterations&0x1) if(iterations&0x1)
CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopyMask, &iwInter, &iwDst); CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopy, iwInter, iwDst);
} }
else else
{ {
@ -1238,11 +1290,11 @@ static bool ippMorph(int op, int src_type, int dst_type,
{ {
iwInter.Alloc(initSize, type, channels); iwInter.Alloc(initSize, type, channels);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, &iwSrc, &iwInter, morphType, &iwMask, NULL, iwBorderType); CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, iwSrc, iwInter, morphType, iwMask, ::ipp::IwDefault(), iwBorderType);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopyMask, &iwInter, &iwDst); CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopy, iwInter, iwDst);
} }
else else
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, &iwSrc, &iwDst, morphType, &iwMask, NULL, iwBorderType); CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, iwSrc, iwDst, morphType, iwMask, ::ipp::IwDefault(), iwBorderType);
} }
} }
catch(::ipp::IwException ex) catch(::ipp::IwException ex)
@ -1912,6 +1964,7 @@ static bool ocl_morphologyEx(InputArray _src, OutputArray _dst, int op,
} }
#endif #endif
#define IPP_DISABLE_MORPH_ADV 1
#ifdef HAVE_IPP #ifdef HAVE_IPP
#if !IPP_DISABLE_MORPH_ADV #if !IPP_DISABLE_MORPH_ADV
namespace cv { namespace cv {

@ -1729,80 +1729,47 @@ namespace cv
{ {
static bool ipp_boxfilter(Mat &src, Mat &dst, Size ksize, Point anchor, bool normalize, int borderType) static bool ipp_boxfilter(Mat &src, Mat &dst, Size ksize, Point anchor, bool normalize, int borderType)
{ {
#ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_IPP()
// Problem with SSE42 optimization for 16s #if IPP_VERSION_X100 < 201801
#if IPP_DISABLE_PERF_BOX16S_SSE42 // Problem with SSE42 optimization for 16s and some 8u modes
if(src.depth() == CV_16S && !(ipp::getIppFeatures()&ippCPUID_AVX)) if(ipp::getIppTopFeatures() == ippCPUID_SSE42 && (((src.depth() == CV_16S || src.depth() == CV_16U) && (src.channels() == 3 || src.channels() == 4)) || (src.depth() == CV_8U && src.channels() == 3 && (ksize.width > 5 || ksize.height > 5))))
return false; return false;
#endif
int stype = src.type(), cn = CV_MAT_CN(stype); // Other optimizations has some degradations too
IppiBorderType ippBorderType = ippiGetBorderType(borderType & ~BORDER_ISOLATED); if((((src.depth() == CV_16S || src.depth() == CV_16U) && (src.channels() == 4)) || (src.depth() == CV_8U && src.channels() == 1 && (ksize.width > 5 || ksize.height > 5))))
IppDataType ippType = ippiGetDataType(stype); return false;
Point ocvAnchor, ippAnchor; #endif
ocvAnchor.x = anchor.x < 0 ? ksize.width / 2 : anchor.x;
ocvAnchor.y = anchor.y < 0 ? ksize.height / 2 : anchor.y;
ippAnchor.x = ksize.width / 2 - (ksize.width % 2 == 0 ? 1 : 0);
ippAnchor.y = ksize.height / 2 - (ksize.height % 2 == 0 ? 1 : 0);
if(normalize && (!src.isSubmatrix() || borderType&BORDER_ISOLATED) && stype == dst.type() &&
(ippBorderType == ippBorderRepl || /* returns ippStsStepErr: Step value is not valid */
ippBorderType == ippBorderConst ||
ippBorderType == ippBorderMirror) && ocvAnchor == ippAnchor) // returns ippStsMaskSizeErr: mask has an illegal value
{
IppStatus status;
Ipp32s bufSize = 0;
IppiSize roiSize = { dst.cols, dst.rows };
IppiSize maskSize = { ksize.width, ksize.height };
IppAutoBuffer<Ipp8u> buffer;
if(ippiFilterBoxBorderGetBufferSize(roiSize, maskSize, ippType, cn, &bufSize) < 0)
return false;
buffer.allocate(bufSize); if(!normalize)
return false;
#define IPP_FILTER_BOX_BORDER(ippType, flavor)\ if(!ippiCheckAnchor(anchor, ksize))
{\ return false;
ippType borderValue[4] = { 0, 0, 0, 0 };\
status = CV_INSTRUMENT_FUN_IPP(ippiFilterBoxBorder_##flavor, src.ptr<ippType>(), (int)src.step, dst.ptr<ippType>(),\
(int)dst.step, roiSize, maskSize,\
ippBorderType, borderValue, buffer);\
}
if (stype == CV_8UC1) try
IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C1R) {
else if (stype == CV_8UC3) ::ipp::IwiImage iwSrc = ippiGetImage(src);
IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C3R) ::ipp::IwiImage iwDst = ippiGetImage(dst);
else if (stype == CV_8UC4) ::ipp::IwiSize iwKSize = ippiGetSize(ksize);
IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C4R) ::ipp::IwiBorderSize borderSize(iwKSize);
else if (stype == CV_16UC1) ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C1R) if(!ippBorder)
else if (stype == CV_16UC3)
IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C3R)
else if (stype == CV_16UC4)
IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C4R)
else if (stype == CV_16SC1)
IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C1R)
else if (stype == CV_16SC3)
IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C3R)
else if (stype == CV_16SC4)
IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C4R)
else if (stype == CV_32FC1)
IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C1R)
else if (stype == CV_32FC3)
IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C3R)
else if (stype == CV_32FC4)
IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C4R)
else
return false; return false;
if(status >= 0) CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBox, iwSrc, iwDst, iwKSize, ::ipp::IwDefault(), ippBorder);
return true; }
catch (::ipp::IwException)
{
return false;
} }
#undef IPP_FILTER_BOX_BORDER
return true;
#else
CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(anchor); CV_UNUSED(normalize); CV_UNUSED(borderType);
return false; return false;
#endif
} }
} }
#endif #endif
@ -2241,8 +2208,11 @@ static bool openvx_gaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
#endif #endif
#ifdef HAVE_IPP #ifdef HAVE_IPP
#define IPP_DISABLE_FILTERING_INMEM_PARTIAL 1 // IW 2017u2 has bug which doesn't allow use of partial inMem with tiling #if IPP_VERSION_X100 == 201702 // IW 2017u2 has bug which doesn't allow use of partial inMem with tiling
#define IPP_GAUSSIANBLUR_PARALLEL 0
#else
#define IPP_GAUSSIANBLUR_PARALLEL 1 #define IPP_GAUSSIANBLUR_PARALLEL 1
#endif
#ifdef HAVE_IPP_IW #ifdef HAVE_IPP_IW
@ -2266,8 +2236,8 @@ public:
try try
{ {
::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, m_dst.m_size.width, range.end - range.start); ::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, &m_src, &m_dst, m_kernelSize, m_sigma, m_border, &roi); CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, m_src, m_dst, m_kernelSize, m_sigma, ::ipp::IwDefault(), m_border, tile);
} }
catch(::ipp::IwException e) catch(::ipp::IwException e)
{ {
@ -2295,7 +2265,7 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
#ifdef HAVE_IPP_IW #ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 <= 201702 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__)) #if IPP_VERSION_X100 < 201800 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__))
CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType); CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
return false; // bug on ia32 return false; // bug on ia32
#else #else
@ -2313,17 +2283,15 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
{ {
Mat src = _src.getMat(); Mat src = _src.getMat();
Mat dst = _dst.getMat(); Mat dst = _dst.getMat();
::ipp::IwiImage iwSrc = ippiGetImage(src); ::ipp::IwiImage iwSrc = ippiGetImage(src);
::ipp::IwiImage iwDst = ippiGetImage(dst); ::ipp::IwiImage iwDst = ippiGetImage(dst);
::ipp::IwiBorderSize borderSize(::ipp::IwiSize(ippiSize(ksize))); ::ipp::IwiBorderSize borderSize = ::ipp::iwiSizeToBorderSize(ippiGetSize(ksize));
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize)); ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
if(!ippBorder.m_borderType) if(!ippBorder)
return false; return false;
const bool disableThreading = IPP_DISABLE_FILTERING_INMEM_PARTIAL &&
((ippBorder.m_borderFlags)&ippBorderInMem) && ((ippBorder.m_borderFlags)&ippBorderInMem) != ippBorderInMem;
const int threads = ippiSuggestThreadsNum(iwDst, 2); const int threads = ippiSuggestThreadsNum(iwDst, 2);
if(!disableThreading && IPP_GAUSSIANBLUR_PARALLEL && threads > 1) { if(IPP_GAUSSIANBLUR_PARALLEL && threads > 1) {
bool ok; bool ok;
ipp_gaussianBlurParallel invoker(iwSrc, iwDst, ksize.width, (float) sigma1, ippBorder, &ok); ipp_gaussianBlurParallel invoker(iwSrc, iwDst, ksize.width, (float) sigma1, ippBorder, &ok);
@ -2335,7 +2303,7 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
if(!ok) if(!ok)
return false; return false;
} else { } else {
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, &iwSrc, &iwDst, ksize.width, (float) sigma1, ippBorder); CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, iwSrc, iwDst, ksize.width, sigma1, ::ipp::IwDefault(), ippBorder);
} }
} }
catch (::ipp::IwException ex) catch (::ipp::IwException ex)
@ -3411,6 +3379,12 @@ static bool ipp_medianFilter(Mat &src0, Mat &dst, int ksize)
{ {
CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 < 201801
// Degradations for big kernel
if(ksize > 7)
return false;
#endif
{ {
int bufSize; int bufSize;
IppiSize dstRoiSize = ippiSize(dst.cols, dst.rows), maskSize = ippiSize(ksize, ksize); IppiSize dstRoiSize = ippiSize(dst.cols, dst.rows), maskSize = ippiSize(ksize, ksize);
@ -4279,8 +4253,8 @@ public:
try try
{ {
::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, dst.m_size.width, range.end - range.start); ::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, dst.m_size.width, range.end - range.start);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, &src, &dst, radius, valSquareSigma, posSquareSigma, ippiFilterBilateralGauss, ippDistNormL1, borderType, &roi); CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, src, dst, radius, valSquareSigma, posSquareSigma, ::ipp::IwDefault(), borderType, tile);
} }
catch(::ipp::IwException) catch(::ipp::IwException)
{ {
@ -4318,13 +4292,11 @@ static bool ipp_bilateralFilter(Mat &src, Mat &dst, int d, double sigmaColor, do
::ipp::IwiImage iwDst = ippiGetImage(dst); ::ipp::IwiImage iwDst = ippiGetImage(dst);
::ipp::IwiBorderSize borderSize(radius); ::ipp::IwiBorderSize borderSize(radius);
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize)); ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
if(!ippBorder.m_borderType) if(!ippBorder)
return false; return false;
const bool disableThreading = IPP_DISABLE_FILTERING_INMEM_PARTIAL &&
((ippBorder.m_borderFlags)&ippBorderInMem) && ((ippBorder.m_borderFlags)&ippBorderInMem) != ippBorderInMem;
const int threads = ippiSuggestThreadsNum(iwDst, 2); const int threads = ippiSuggestThreadsNum(iwDst, 2);
if(!disableThreading && IPP_BILATERAL_PARALLEL && threads > 1) { if(IPP_BILATERAL_PARALLEL && threads > 1) {
bool ok = true; bool ok = true;
Range range(0, (int)iwDst.m_size.height); Range range(0, (int)iwDst.m_size.height);
ipp_bilateralFilterParallel invoker(iwSrc, iwDst, radius, valSquareSigma, posSquareSigma, ippBorder, &ok); ipp_bilateralFilterParallel invoker(iwSrc, iwDst, radius, valSquareSigma, posSquareSigma, ippBorder, &ok);
@ -4336,7 +4308,7 @@ static bool ipp_bilateralFilter(Mat &src, Mat &dst, int d, double sigmaColor, do
if(!ok) if(!ok)
return false; return false;
} else { } else {
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, &iwSrc, &iwDst, radius, valSquareSigma, posSquareSigma, ippiFilterBilateralGauss, ippDistNormL1, ippBorder); CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, iwSrc, iwDst, radius, valSquareSigma, posSquareSigma, ::ipp::IwDefault(), ippBorder);
} }
} }
catch (::ipp::IwException) catch (::ipp::IwException)

@ -962,19 +962,18 @@ static bool ipp_getThreshVal_Otsu_8u( const unsigned char* _src, int step, Size
{ {
CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 >= 810 // Performance degradations
int ippStatus = -1; #if IPP_VERSION_X100 >= 201800
IppiSize srcSize = { size.width, size.height }; IppiSize srcSize = { size.width, size.height };
CV_SUPPRESS_DEPRECATED_START
ippStatus = CV_INSTRUMENT_FUN_IPP(ippiComputeThreshold_Otsu_8u_C1R, _src, step, srcSize, &thresh);
CV_SUPPRESS_DEPRECATED_END
if(ippStatus >= 0) if(CV_INSTRUMENT_FUN_IPP(ippiComputeThreshold_Otsu_8u_C1R, _src, step, srcSize, &thresh) < 0)
return true; return false;
return true;
#else #else
CV_UNUSED(_src); CV_UNUSED(step); CV_UNUSED(size); CV_UNUSED(thresh); CV_UNUSED(_src); CV_UNUSED(step); CV_UNUSED(size); CV_UNUSED(thresh);
#endif
return false; return false;
#endif
} }
#endif #endif
@ -992,7 +991,7 @@ getThreshVal_Otsu_8u( const Mat& _src )
#ifdef HAVE_IPP #ifdef HAVE_IPP
unsigned char thresh; unsigned char thresh;
CV_IPP_RUN(IPP_VERSION_X100 >= 810, ipp_getThreshVal_Otsu_8u(_src.ptr(), step, size, thresh), thresh); CV_IPP_RUN_FAST(ipp_getThreshVal_Otsu_8u(_src.ptr(), step, size, thresh), thresh);
#endif #endif
const int N = 256; const int N = 256;

@ -136,7 +136,7 @@ icvReleaseHidHaarClassifierCascade( CvHidHaarClassifierCascade** _cascade )
for( i = 0; i < cascade->count; i++ ) for( i = 0; i < cascade->count; i++ )
{ {
if( cascade->ipp_stages[i] ) if( cascade->ipp_stages[i] )
#if IPP_VERSION_X100 < 900 #if IPP_VERSION_X100 < 900 && !IPP_DISABLE_HAAR
ippiHaarClassifierFree_32f( (IppiHaarClassifier_32f*)cascade->ipp_stages[i] ); ippiHaarClassifierFree_32f( (IppiHaarClassifier_32f*)cascade->ipp_stages[i] );
#else #else
cvFree(&cascade->ipp_stages[i]); cvFree(&cascade->ipp_stages[i]);

@ -339,7 +339,7 @@ typedef struct ImplData
// convert flags register to more handy variables // convert flags register to more handy variables
void flagsToVars(int flags) void flagsToVars(int flags)
{ {
#if defined(HAVE_IPP_ICV_ONLY) #if defined(HAVE_IPP_ICV)
ipp = 0; ipp = 0;
icv = ((flags&CV_IMPL_IPP) > 0); icv = ((flags&CV_IMPL_IPP) > 0);
#else #else

@ -3078,6 +3078,16 @@ void printVersionInfo(bool useStdOut)
::testing::Test::RecordProperty("cv_tegra_optimization", tegra_optimization); ::testing::Test::RecordProperty("cv_tegra_optimization", tegra_optimization);
if (useStdOut) std::cout << "Tegra optimization: " << tegra_optimization << std::endl; if (useStdOut) std::cout << "Tegra optimization: " << tegra_optimization << std::endl;
#endif #endif
#ifdef HAVE_IPP
const char * ipp_optimization = cv::ipp::useIPP()? "enabled" : "disabled";
::testing::Test::RecordProperty("cv_ipp_optimization", ipp_optimization);
if (useStdOut) std::cout << "Intel(R) IPP optimization: " << ipp_optimization << std::endl;
cv::String ippVer = cv::ipp::getIppVersion();
::testing::Test::RecordProperty("cv_ipp_version", ippVer);
if(useStdOut) std::cout << "Intel(R) IPP version: " << ippVer.c_str() << std::endl;
#endif
} }

Loading…
Cancel
Save