Merge pull request #12877 from maver1:3.4

* Updated ICV packages and IPP integration

* core(test): minMaxIdx IPP regression test

* core(ipp): workaround minMaxIdx problem

* core(ipp): workaround meanStdDev() CV_32FC3 buffer overrun

* Returned semicolon after CV_INSTRUMENT_REGION_IPP()
pull/12994/head
maver1 6 years ago committed by Alexander Alekhin
parent c8fc8d210b
commit e397434cb6
  1. 26
      3rdparty/ippicv/ippicv.cmake
  2. 7
      cmake/OpenCVFindIPP.cmake
  3. 115
      cmake/OpenCVFindIPPIW.cmake
  4. 1
      cmake/templates/cvconfig.h.in
  5. 6
      modules/core/include/opencv2/core/base.hpp
  6. 4
      modules/core/include/opencv2/core/private.hpp
  7. 4
      modules/core/src/channels.cpp
  8. 6
      modules/core/src/copy.cpp
  9. 5
      modules/core/src/mean.cpp
  10. 2
      modules/core/src/merge.cpp
  11. 6
      modules/core/src/minmax.cpp
  12. 2
      modules/core/src/split.cpp
  13. 25
      modules/core/src/system.cpp
  14. 26
      modules/core/test/test_arithm.cpp
  15. 2
      modules/imgproc/src/filter.cpp
  16. 2
      modules/imgproc/src/resize.cpp
  17. 239
      modules/objdetect/src/haar.cpp

@ -2,37 +2,37 @@ function(download_ippicv root_var)
set(${root_var} "" PARENT_SCOPE)
# Commit SHA in the opencv_3rdparty repo
set(IPPICV_COMMIT "bdb7bb85f34a8cb0d35e40a81f58da431aa1557a")
set(IPPICV_COMMIT "32e315a5b106a7b89dbed51c28f8120a48b368b4")
# Define actual ICV versions
if(APPLE)
set(OPENCV_ICV_PLATFORM "macosx")
set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_mac")
if(X86_64)
set(OPENCV_ICV_NAME "ippicv_2017u3_mac_intel64_general_20180518.tgz")
set(OPENCV_ICV_HASH "3ae52b9be0fe73dd45bc5e9429cd3732")
set(OPENCV_ICV_NAME "ippicv_2019_mac_intel64_general_20180723.tgz")
set(OPENCV_ICV_HASH "fe6b2bb75ae0e3f19ad3ae1a31dfa4a2")
else()
set(OPENCV_ICV_NAME "ippicv_2017u3_mac_ia32_general_20180518.tgz")
set(OPENCV_ICV_HASH "698660b975b62bee3ef6c5af51e97544")
set(OPENCV_ICV_NAME "ippicv_2019_mac_ia32_general_20180723.tgz")
set(OPENCV_ICV_HASH "b5dfa78c87eb75c64470cbe5ec876f4f")
endif()
elseif((UNIX AND NOT ANDROID) OR (UNIX AND ANDROID_ABI MATCHES "x86"))
set(OPENCV_ICV_PLATFORM "linux")
set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_lnx")
if(X86_64)
set(OPENCV_ICV_NAME "ippicv_2017u3_lnx_intel64_general_20180518.tgz")
set(OPENCV_ICV_HASH "b7cc351267db2d34b9efa1cd22ff0572")
set(OPENCV_ICV_NAME "ippicv_2019_lnx_intel64_general_20180723.tgz")
set(OPENCV_ICV_HASH "c0bd78adb4156bbf552c1dfe90599607")
else()
set(OPENCV_ICV_NAME "ippicv_2017u3_lnx_ia32_general_20180518.tgz")
set(OPENCV_ICV_HASH "ea72de74dae3c604eb6348395366e78e")
set(OPENCV_ICV_NAME "ippicv_2019_lnx_ia32_general_20180723.tgz")
set(OPENCV_ICV_HASH "4f38432c30bfd6423164b7a24bbc98a0")
endif()
elseif(WIN32 AND NOT ARM)
set(OPENCV_ICV_PLATFORM "windows")
set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_win")
if(X86_64)
set(OPENCV_ICV_NAME "ippicv_2017u3_win_intel64_general_20180518.zip")
set(OPENCV_ICV_HASH "915ff92958089ede8ea532d3c4fe7187")
set(OPENCV_ICV_NAME "ippicv_2019_win_intel64_20180723_general.zip")
set(OPENCV_ICV_HASH "1d222685246896fe089f88b8858e4b2f")
else()
set(OPENCV_ICV_NAME "ippicv_2017u3_win_ia32_general_20180518.zip")
set(OPENCV_ICV_HASH "928168c2d99ab284047dfcfb7a821d91")
set(OPENCV_ICV_NAME "ippicv_2019_win_ia32_20180723_general.zip")
set(OPENCV_ICV_HASH "0157251a2eb9cd63a3ebc7eed0f3e59e")
endif()
else()
return()

@ -240,11 +240,12 @@ endif()
if(NOT DEFINED IPPROOT)
include("${OpenCV_SOURCE_DIR}/3rdparty/ippicv/ippicv.cmake")
download_ippicv(IPPROOT)
if(NOT IPPROOT)
download_ippicv(ICV_PACKAGE_ROOT)
if(NOT ICV_PACKAGE_ROOT)
return()
endif()
ocv_install_3rdparty_licenses(ippicv "${IPPROOT}/readme.htm" "${IPPROOT}/license/ippEULA.txt")
set(IPPROOT "${ICV_PACKAGE_ROOT}/icv")
ocv_install_3rdparty_licenses(ippicv "${IPPROOT}/readme.htm" "${ICV_PACKAGE_ROOT}/EULA.txt")
endif()
file(TO_CMAKE_PATH "${IPPROOT}" __IPPROOT)

@ -1,17 +1,19 @@
#
# The script to detect Intel(R) Integrated Performance Primitives Integration Wrappers (IPP IW)
# The script to detect Intel(R) Integrated Performance Primitives Integration Wrappers (IPP Integration Wrappers)
# installation/package
#
#
# On return this will define:
#
# HAVE_IPP_IW - True if Intel IPP found
# IPP_IW_PATH - Root of Intel IPP IW directory
# IPP_IW_LIBRARIES - Intel IPP IW libraries
# IPP_IW_INCLUDES - Intel IPP IW include folder
# HAVE_IPP_IW - True if Intel IPP Integration Wrappers found
# HAVE_IPP_IW_LL - True if Intel IPP Integration Wrappers found with Low Level API header
# IPP_IW_PATH - Root of Intel IPP Integration Wrappers directory
# IPP_IW_LIBRARIES - Intel IPP Integration Wrappers libraries
# IPP_IW_INCLUDES - Intel IPP Integration Wrappers include folder
#
unset(HAVE_IPP_IW CACHE)
unset(HAVE_IPP_IW_LL CACHE)
unset(IPP_IW_PATH)
unset(IPP_IW_LIBRARIES)
unset(IPP_IW_INCLUDES)
@ -29,13 +31,16 @@ macro(ippiw_debugmsg MESSAGE)
endmacro()
file(TO_CMAKE_PATH "${IPPROOT}" IPPROOT)
# This function detects Intel IPP IW version by analyzing .h file
# This function detects Intel IPP Integration Wrappers version by analyzing .h file
macro(ippiw_setup PATH BUILD)
set(FILE "${PATH}/include/iw/iw_ll.h") # check if Intel IPP IW is OpenCV specific
ippiw_debugmsg("Checking path: ${PATH}")
set(FILE "${PATH}/include/iw/iw_version.h")
if(${BUILD})
ippiw_debugmsg("Checking sources: ${PATH}")
else()
ippiw_debugmsg("Checking binaries: ${PATH}")
endif()
if(EXISTS "${FILE}")
set(FILE "${PATH}/include/iw/iw_version.h")
ippiw_debugmsg("vfile\tok")
ippiw_debugmsg("vfile\tfound")
file(STRINGS "${FILE}" IW_VERSION_MAJOR REGEX "IW_VERSION_MAJOR")
file(STRINGS "${FILE}" IW_VERSION_MINOR REGEX "IW_VERSION_MINOR")
file(STRINGS "${FILE}" IW_VERSION_UPDATE REGEX "IW_VERSION_UPDATE")
@ -56,13 +61,13 @@ macro(ippiw_setup PATH BUILD)
math(EXPR IW_MIN_COMPATIBLE_IPP_EXP "${IW_MIN_COMPATIBLE_IPP_MAJOR}*10000 + ${IW_MIN_COMPATIBLE_IPP_MINOR}*100 + ${IW_MIN_COMPATIBLE_IPP_UPDATE}")
if((IPP_VERSION_EXP GREATER IW_MIN_COMPATIBLE_IPP_EXP) OR (IPP_VERSION_EXP EQUAL IW_MIN_COMPATIBLE_IPP_EXP))
ippiw_debugmsg("version\tok")
ippiw_debugmsg("vcheck\tpassed")
if(${BUILD})
# check sources
if(EXISTS "${PATH}/src/iw_core.c")
ippiw_debugmsg("sources\tok")
ippiw_debugmsg("sources\tyes")
set(IPP_IW_PATH "${PATH}")
message(STATUS "found Intel IPP IW sources: ${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE}")
message(STATUS "found Intel IPP Integration Wrappers sources: ${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE}")
message(STATUS "at: ${IPP_IW_PATH}")
set(IPP_IW_LIBRARY ippiw)
@ -72,7 +77,13 @@ macro(ippiw_setup PATH BUILD)
add_subdirectory("${IPP_IW_PATH}/" ${OpenCV_BINARY_DIR}/3rdparty/ippiw)
set(HAVE_IPP_IW 1)
set(FILE "${PATH}/include/iw/iw_ll.h") # check if Intel IPP Integration Wrappers is OpenCV specific
if(EXISTS "${FILE}")
set(HAVE_IPP_IW_LL 1)
endif()
return()
else()
ippiw_debugmsg("sources\tno")
endif()
else()
# check binaries
@ -82,9 +93,9 @@ macro(ippiw_setup PATH BUILD)
set(FILE "${PATH}/lib/ia32/${CMAKE_STATIC_LIBRARY_PREFIX}ipp_iw${CMAKE_STATIC_LIBRARY_SUFFIX}")
endif()
if(EXISTS ${FILE})
ippiw_debugmsg("binaries\tok (64=${IPP_X64})")
ippiw_debugmsg("binaries\tyes (64=${IPP_X64})")
set(IPP_IW_PATH "${PATH}")
message(STATUS "found Intel IPP IW binaries: ${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE}")
message(STATUS "found Intel IPP Integration Wrappers binaries: ${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE}")
message(STATUS "at: ${IPP_IW_PATH}")
add_library(ippiw STATIC IMPORTED)
@ -105,81 +116,77 @@ macro(ippiw_setup PATH BUILD)
set(HAVE_IPP_IW 1)
set(BUILD_IPP_IW 0)
set(FILE "${PATH}/include/iw/iw_ll.h") # check if Intel IPP Integration Wrappers is OpenCV specific
if(EXISTS "${FILE}")
set(HAVE_IPP_IW_LL 1)
endif()
return()
else()
ippiw_debugmsg("binaries\tno")
endif()
endif()
else()
ippiw_debugmsg("vcheck\tfailed")
endif()
else()
ippiw_debugmsg("vfile\tnot found")
endif()
set(HAVE_IPP_IW 0)
set(HAVE_IPP_IW_LL 0)
endmacro()
# check os and architecture
if(APPLE)
set(IW_PACKAGE_SUBDIR "ippiw_mac")
elseif((UNIX AND NOT ANDROID) OR (UNIX AND ANDROID_ABI MATCHES "x86"))
set(IW_PACKAGE_SUBDIR "ippiw_lnx")
elseif(WIN32 AND NOT ARM)
set(IW_PACKAGE_SUBDIR "ippiw_win")
else()
message(SEND_ERROR "Improper system for Intel IPP Integrations Wrappers. This message shouldn't appear. Check Intel IPP configurations steps")
return()
endif()
# check build options first
if(BUILD_IPP_IW)
# custom path
if(DEFINED IPPIWROOT)
ippiw_setup("${IPPIWROOT}/" 1)
message(STATUS "Can't find Intel IPP IW sources at: ${IPPIWROOT}")
message(STATUS "Can't find Intel IPP Integration Wrappers sources at: ${IPPIWROOT}")
endif()
# local sources
ippiw_setup("${OpenCV_SOURCE_DIR}/3rdparty/ippiw" 1)
set(IPPIW_ROOT "${IPPROOT}/../${IW_PACKAGE_SUBDIR}")
set(IPPIW_ROOT "${IPPROOT}/../iw")
ocv_install_3rdparty_licenses(ippiw
"${IPPIW_ROOT}/EULA.txt"
"${IPPIW_ROOT}/redist.txt"
"${IPPIW_ROOT}/support.txt"
"${IPPIW_ROOT}/third-party-programs.txt")
"${IPPIW_ROOT}/../EULA.txt"
"${IPPIW_ROOT}/../support.txt"
"${IPPIW_ROOT}/../third-party-programs.txt")
# Package sources
get_filename_component(__PATH "${IPPROOT}/../${IW_PACKAGE_SUBDIR}/" ABSOLUTE)
get_filename_component(__PATH "${IPPROOT}/../iw/" ABSOLUTE)
ippiw_setup("${__PATH}" 1)
# take Intel IPP Integration Wrappers from ICV package
if(NOT HAVE_IPP_ICV)
message(STATUS "Cannot find Intel IPP Integration Wrappers. Checking \"Intel IPP for OpenCV\" package")
set(TEMP_ROOT 0)
include("${OpenCV_SOURCE_DIR}/3rdparty/ippicv/ippicv.cmake")
download_ippicv(TEMP_ROOT)
set(IPPIW_ROOT "${TEMP_ROOT}/iw/")
ocv_install_3rdparty_licenses(ippiw
"${IPPIW_ROOT}/../EULA.txt"
"${IPPIW_ROOT}/../support.txt"
"${IPPIW_ROOT}/../third-party-programs.txt")
ippiw_setup("${IPPIW_ROOT}" 1)
endif()
endif()
# custom binaries
if(DEFINED IPPIWROOT)
ippiw_setup("${IPPIWROOT}/" 0)
message(STATUS "Can't find Intel IPP IW sources at: ${IPPIWROOT}")
message(STATUS "Can't find Intel IPP Integration Wrappers binaries at: ${IPPIWROOT}")
endif()
# check binaries in IPP folder
ippiw_setup("${IPPROOT}/" 0)
# check binaries near IPP folder
ippiw_setup("${IPPROOT}/../${IW_PACKAGE_SUBDIR}/" 0)
# take Intel IPP IW from ICV package
if(NOT HAVE_IPP_ICV AND BUILD_IPP_IW)
message(STATUS "Cannot find Intel IPP IW. Checking \"Intel IPP for OpenCV\" package")
set(TEMP_ROOT 0)
include("${OpenCV_SOURCE_DIR}/3rdparty/ippicv/ippicv.cmake")
download_ippicv(TEMP_ROOT)
set(IPPIW_ROOT "${TEMP_ROOT}/../${IW_PACKAGE_SUBDIR}")
ocv_install_3rdparty_licenses(ippiw
"${IPPIW_ROOT}/EULA.txt"
"${IPPIW_ROOT}/redist.txt"
"${IPPIW_ROOT}/support.txt"
"${IPPIW_ROOT}/third-party-programs.txt")
# Package sources. Only sources are compatible with regular Intel IPP
ippiw_setup("${IPPIW_ROOT}" 1)
endif()
ippiw_setup("${IPPROOT}/../iw/" 0)
set(HAVE_IPP_IW 0)
set(HAVE_IPP_IW_LL 0)
message(STATUS "Cannot find Intel IPP Integration Wrappers, optimizations will be limited. Use IPPIWROOT to set custom location")
return()

@ -105,6 +105,7 @@
#cmakedefine HAVE_IPP
#cmakedefine HAVE_IPP_ICV
#cmakedefine HAVE_IPP_IW
#cmakedefine HAVE_IPP_IW_LL
/* JPEG-2000 codec */
#cmakedefine HAVE_JASPER

@ -767,9 +767,13 @@ CV_EXPORTS_W void setUseIPP(bool flag);
CV_EXPORTS_W String getIppVersion();
// IPP Not-Exact mode. This function may force use of IPP then both IPP and OpenCV provide proper results
// but have internal accuracy differences which have to much direct or indirect impact on accuracy tests.
// but have internal accuracy differences which have too much direct or indirect impact on accuracy tests.
CV_EXPORTS_W bool useIPP_NotExact();
CV_EXPORTS_W void setUseIPP_NotExact(bool flag);
#if OPENCV_ABI_COMPATIBILITY < 400
CV_EXPORTS_W bool useIPP_NE();
CV_EXPORTS_W void setUseIPP_NE(bool flag);
#endif
} // ipp

@ -194,8 +194,8 @@ T* allocSingleton(size_t count = 1) { return static_cast<T*>(allocSingletonBuffe
#define IPP_DISABLE_LAB_RGB 1 // breaks OCL accuracy tests
#define IPP_DISABLE_RGB_XYZ 1 // big accuracy difference
#define IPP_DISABLE_XYZ_RGB 1 // big accuracy difference
#define IPP_DISABLE_HAAR 1 // improper integration/results
#define IPP_DISABLE_HOUGH 1 // improper integration/results
#define IPP_DISABLE_FILTER2D_BIG_MASK 1 // different results on masks > 7x7
#define IPP_DISABLE_GAUSSIANBLUR_PARALLEL 1 // not supported (2017u2 / 2017u3)
@ -229,7 +229,9 @@ T* allocSingleton(size_t count = 1) { return static_cast<T*>(allocSingletonBuffe
# pragma GCC diagnostic ignored "-Wsuggest-override"
# endif
#include "iw++/iw.hpp"
# ifdef HAVE_IPP_IW_LL
#include "iw/iw_ll.h"
# endif
# if defined(__OPENCV_BUILD) && defined(__GNUC__) && __GNUC__ >= 5
# pragma GCC diagnostic pop
# endif

@ -341,7 +341,7 @@ namespace cv
{
static bool ipp_extractChannel(const Mat &src, Mat &dst, int channel)
{
#ifdef HAVE_IPP_IW
#ifdef HAVE_IPP_IW_LL
CV_INSTRUMENT_REGION_IPP();
int srcChannels = src.channels();
@ -379,7 +379,7 @@ static bool ipp_extractChannel(const Mat &src, Mat &dst, int channel)
static bool ipp_insertChannel(const Mat &src, Mat &dst, int channel)
{
#ifdef HAVE_IPP_IW
#ifdef HAVE_IPP_IW_LL
CV_INSTRUMENT_REGION_IPP();
int srcChannels = src.channels();

@ -328,7 +328,7 @@ void Mat::copyTo( OutputArray _dst ) const
#ifdef HAVE_IPP
static bool ipp_copyTo(const Mat &src, Mat &dst, const Mat &mask)
{
#ifdef HAVE_IPP_IW
#ifdef HAVE_IPP_IW_LL
CV_INSTRUMENT_REGION_IPP();
if(mask.channels() > 1 || mask.depth() != CV_8U)
@ -463,7 +463,7 @@ Mat& Mat::operator = (const Scalar& s)
#ifdef HAVE_IPP
static bool ipp_Mat_setTo_Mat(Mat &dst, Mat &_val, Mat &mask)
{
#ifdef HAVE_IPP_IW
#ifdef HAVE_IPP_IW_LL
CV_INSTRUMENT_REGION_IPP();
if(mask.empty())
@ -1152,7 +1152,7 @@ namespace cv {
static bool ipp_copyMakeBorder( Mat &_src, Mat &_dst, int top, int bottom,
int left, int right, int _borderType, const Scalar& value )
{
#if defined HAVE_IPP_IW && !IPP_DISABLE_PERF_COPYMAKE
#if defined HAVE_IPP_IW_LL && !IPP_DISABLE_PERF_COPYMAKE
CV_INSTRUMENT_REGION_IPP();
::ipp::IwiBorderSize borderSize(left, top, right, bottom);

@ -674,6 +674,11 @@ static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& m
if (cn > 1)
return false;
#endif
#if IPP_VERSION_X100 < 201901
// IPP_DISABLE: 32f C3C functions can read outside of allocated memory
if (cn > 1 && src.depth() == CV_32F)
return false;
#endif
size_t total_size = src.total();
int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;

@ -228,7 +228,7 @@ static MergeFunc getMergeFunc(int depth)
namespace cv {
static bool ipp_merge(const Mat* mv, Mat& dst, int channels)
{
#ifdef HAVE_IPP_IW
#ifdef HAVE_IPP_IW_LL
CV_INSTRUMENT_REGION_IPP();
if(channels != 3 && channels != 4)

@ -8,6 +8,8 @@
#include "opencv2/core/openvx/ovx_defs.hpp"
#include "stat.hpp"
#define IPP_DISABLE_MINMAXIDX_MANY_ROWS 1 // see Core_MinMaxIdx.rows_overflow test
/****************************************************************************************\
* minMaxLoc *
\****************************************************************************************/
@ -624,6 +626,10 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI
if(src.dims <= 2)
{
IppiSize size = ippiSize(src.size());
#if defined(_WIN32) && !defined(_WIN64) && IPP_VERSION_X100 == 201900 && IPP_DISABLE_MINMAXIDX_MANY_ROWS
if (size.height > 65536)
return false; // test: Core_MinMaxIdx.rows_overflow
#endif
size.width *= src.channels();
status = ippMinMaxFun(src.ptr(), (int)src.step, size, dataType, pMinVal, pMaxVal, pMinIdx, pMaxIdx, (Ipp8u*)mask.ptr(), (int)mask.step);

@ -236,7 +236,7 @@ static SplitFunc getSplitFunc(int depth)
namespace cv {
static bool ipp_split(const Mat& src, Mat* mv, int channels)
{
#ifdef HAVE_IPP_IW
#ifdef HAVE_IPP_IW_LL
CV_INSTRUMENT_REGION_IPP();
if(channels != 3 && channels != 4)

@ -2078,7 +2078,12 @@ public:
cv::String env = pIppEnv;
if(env.size())
{
#if IPP_VERSION_X100 >= 201703
#if IPP_VERSION_X100 >= 201900
const Ipp64u minorFeatures = ippCPUID_MOVBE|ippCPUID_AES|ippCPUID_CLMUL|ippCPUID_ABR|ippCPUID_RDRAND|ippCPUID_F16C|
ippCPUID_ADCOX|ippCPUID_RDSEED|ippCPUID_PREFETCHW|ippCPUID_SHA|ippCPUID_MPX|ippCPUID_AVX512CD|ippCPUID_AVX512ER|
ippCPUID_AVX512PF|ippCPUID_AVX512BW|ippCPUID_AVX512DQ|ippCPUID_AVX512VL|ippCPUID_AVX512VBMI|ippCPUID_AVX512_4FMADDPS|
ippCPUID_AVX512_4VNNIW|ippCPUID_AVX512IFMA;
#elif IPP_VERSION_X100 >= 201703
const Ipp64u minorFeatures = ippCPUID_MOVBE|ippCPUID_AES|ippCPUID_CLMUL|ippCPUID_ABR|ippCPUID_RDRAND|ippCPUID_F16C|
ippCPUID_ADCOX|ippCPUID_RDSEED|ippCPUID_PREFETCHW|ippCPUID_SHA|ippCPUID_MPX|ippCPUID_AVX512CD|ippCPUID_AVX512ER|
ippCPUID_AVX512PF|ippCPUID_AVX512BW|ippCPUID_AVX512DQ|ippCPUID_AVX512VL|ippCPUID_AVX512VBMI;
@ -2279,7 +2284,7 @@ void setUseIPP(bool flag)
#endif
}
bool useIPP_NE()
bool useIPP_NotExact()
{
#ifdef HAVE_IPP
CoreTLSData* data = getCoreTlsData().get();
@ -2293,17 +2298,29 @@ bool useIPP_NE()
#endif
}
void setUseIPP_NE(bool flag)
void setUseIPP_NotExact(bool flag)
{
CoreTLSData* data = getCoreTlsData().get();
#ifdef HAVE_IPP
data->useIPP_NE = (getIPPSingleton().useIPP_NE)?flag:false;
data->useIPP_NE = flag;
#else
CV_UNUSED(flag);
data->useIPP_NE = false;
#endif
}
#if OPENCV_ABI_COMPATIBILITY < 400
bool useIPP_NE()
{
return useIPP_NotExact();
}
void setUseIPP_NE(bool flag)
{
setUseIPP_NotExact(flag);
}
#endif
} // namespace ipp
} // namespace cv

@ -2298,4 +2298,30 @@ TEST(UMat_Core_DivideRules, type_32f) { testDivide<float, true>(); }
TEST(Core_DivideRules, type_64f) { testDivide<double, false>(); }
TEST(UMat_Core_DivideRules, type_64f) { testDivide<double, true>(); }
TEST(Core_MinMaxIdx, rows_overflow)
{
const int N = 65536 + 1;
const int M = 1;
{
setRNGSeed(123);
Mat m(N, M, CV_32FC1);
randu(m, -100, 100);
double minVal = 0, maxVal = 0;
int minIdx[CV_MAX_DIM] = { 0 }, maxIdx[CV_MAX_DIM] = { 0 };
cv::minMaxIdx(m, &minVal, &maxVal, minIdx, maxIdx);
double minVal0 = 0, maxVal0 = 0;
int minIdx0[CV_MAX_DIM] = { 0 }, maxIdx0[CV_MAX_DIM] = { 0 };
cv::ipp::setUseIPP(false);
cv::minMaxIdx(m, &minVal0, &maxVal0, minIdx0, maxIdx0);
cv::ipp::setUseIPP(true);
EXPECT_FALSE(fabs(minVal0 - minVal) > 1e-6 || fabs(maxVal0 - maxVal) > 1e-6) << "NxM=" << N << "x" << M <<
" min=" << minVal0 << " vs " << minVal <<
" max=" << maxVal0 << " vs " << maxVal;
}
}
}} // namespace

@ -4585,7 +4585,7 @@ static bool ippFilter2D(int stype, int dtype, int kernel_type,
return false;
#endif
#if IPP_VERSION_X100 < 201801
#if IPP_DISABLE_FILTER2D_BIG_MASK
// Too big difference compared to OpenCV FFT-based convolution
if(kernel_type == CV_32FC1 && (type == ipp16s || type == ipp16u) && (kernel_width > 7 || kernel_height > 7))
return false;

@ -3321,7 +3321,7 @@ static bool ipp_resize(const uchar * src_data, size_t src_step, int src_width, i
return false;
// Resize which doesn't match OpenCV exactly
if (!cv::ipp::useIPP_NE())
if (!cv::ipp::useIPP_NotExact())
{
if (ippInter == ippNearest || ippInter == ippSuper || (ippDataType == ipp8u && ippInter == ippLinear))
return false;

@ -94,7 +94,6 @@ typedef struct CvHidHaarClassifierCascade
sqsumtype *pq0, *pq1, *pq2, *pq3;
sumtype *p0, *p1, *p2, *p3;
void** ipp_stages;
bool is_tree;
bool isStumpBased;
} CvHidHaarClassifierCascade;
@ -128,23 +127,6 @@ icvReleaseHidHaarClassifierCascade( CvHidHaarClassifierCascade** _cascade )
{
if( _cascade && *_cascade )
{
#ifdef HAVE_IPP
CvHidHaarClassifierCascade* cascade = *_cascade;
if( CV_IPP_CHECK_COND && cascade->ipp_stages )
{
int i;
for( i = 0; i < cascade->count; i++ )
{
if( cascade->ipp_stages[i] )
#if IPP_VERSION_X100 < 900 && !IPP_DISABLE_HAAR
ippiHaarClassifierFree_32f( (IppiHaarClassifier_32f*)cascade->ipp_stages[i] );
#else
cvFree(&cascade->ipp_stages[i]);
#endif
}
}
cvFree( &cascade->ipp_stages );
#endif
cvFree( _cascade );
}
}
@ -153,10 +135,6 @@ icvReleaseHidHaarClassifierCascade( CvHidHaarClassifierCascade** _cascade )
static CvHidHaarClassifierCascade*
icvCreateHidHaarClassifierCascade( CvHaarClassifierCascade* cascade )
{
CvRect* ipp_features = 0;
float *ipp_weights = 0, *ipp_thresholds = 0, *ipp_val1 = 0, *ipp_val2 = 0;
int* ipp_counts = 0;
CvHidHaarClassifierCascade* out = 0;
int i, j, k, l;
@ -312,72 +290,9 @@ icvCreateHidHaarClassifierCascade( CvHaarClassifierCascade* cascade )
}
}
#if defined HAVE_IPP && !IPP_DISABLE_HAAR
int can_use_ipp = CV_IPP_CHECK_COND && (!out->has_tilted_features && !out->is_tree && out->isStumpBased);
if( can_use_ipp )
{
int ipp_datasize = cascade->count*sizeof(out->ipp_stages[0]);
float ipp_weight_scale=(float)(1./((orig_window_size.width-icv_object_win_border*2)*
(orig_window_size.height-icv_object_win_border*2)));
out->ipp_stages = (void**)cvAlloc( ipp_datasize );
memset( out->ipp_stages, 0, ipp_datasize );
ipp_features = (CvRect*)cvAlloc( max_count*3*sizeof(ipp_features[0]) );
ipp_weights = (float*)cvAlloc( max_count*3*sizeof(ipp_weights[0]) );
ipp_thresholds = (float*)cvAlloc( max_count*sizeof(ipp_thresholds[0]) );
ipp_val1 = (float*)cvAlloc( max_count*sizeof(ipp_val1[0]) );
ipp_val2 = (float*)cvAlloc( max_count*sizeof(ipp_val2[0]) );
ipp_counts = (int*)cvAlloc( max_count*sizeof(ipp_counts[0]) );
for( i = 0; i < cascade->count; i++ )
{
CvHaarStageClassifier* stage_classifier = cascade->stage_classifier + i;
for( j = 0, k = 0; j < stage_classifier->count; j++ )
{
CvHaarClassifier* classifier = stage_classifier->classifier + j;
int rect_count = 2 + (classifier->haar_feature->rect[2].r.width != 0);
ipp_thresholds[j] = classifier->threshold[0];
ipp_val1[j] = classifier->alpha[0];
ipp_val2[j] = classifier->alpha[1];
ipp_counts[j] = rect_count;
for( l = 0; l < rect_count; l++, k++ )
{
ipp_features[k] = classifier->haar_feature->rect[l].r;
//ipp_features[k].y = orig_window_size.height - ipp_features[k].y - ipp_features[k].height;
ipp_weights[k] = classifier->haar_feature->rect[l].weight*ipp_weight_scale;
}
}
if( ippiHaarClassifierInitAlloc_32f( (IppiHaarClassifier_32f**)&out->ipp_stages[i],
(const IppiRect*)ipp_features, ipp_weights, ipp_thresholds,
ipp_val1, ipp_val2, ipp_counts, stage_classifier->count ) < 0 )
break;
}
if( i < cascade->count )
{
for( j = 0; j < i; j++ )
if( out->ipp_stages[i] )
ippiHaarClassifierFree_32f( (IppiHaarClassifier_32f*)out->ipp_stages[i] );
cvFree( &out->ipp_stages );
}
}
#endif
cascade->hid_cascade = out;
assert( (char*)haar_node_ptr - (char*)out <= datasize );
cvFree( &ipp_features );
cvFree( &ipp_weights );
cvFree( &ipp_thresholds );
cvFree( &ipp_val1 );
cvFree( &ipp_val2 );
cvFree( &ipp_counts );
return out;
}
@ -975,120 +890,54 @@ public:
std::vector<int> rejectLevelsLocal;
std::vector<double> levelWeightsLocal;
#ifdef HAVE_IPP
if(CV_IPP_CHECK_COND && cascade->hid_cascade->ipp_stages )
{
IppiRect iequRect = {equRect.x, equRect.y, equRect.width, equRect.height};
CV_INSTRUMENT_FUN_IPP(ippiRectStdDev_32f_C1R, sum1.ptr<float>(y1), (int)sum1.step,
sqsum1.ptr<double>(y1), (int)sqsum1.step,
norm1->ptr<float>(y1), (int)norm1->step,
ippiSize(ssz.width, ssz.height), iequRect);
int positive = (ssz.width/ystep)*((ssz.height + ystep-1)/ystep);
if( ystep == 1 )
(*mask1) = Scalar::all(1);
else
for( y = y1; y < y2; y++ )
{
uchar* mask1row = mask1->ptr(y);
memset( mask1row, 0, ssz.width );
if( y % ystep == 0 )
for( x = 0; x < ssz.width; x += ystep )
mask1row[x] = (uchar)1;
}
for( int j = 0; j < cascade->count; j++ )
for( y = y1; y < y2; y += ystep )
for( x = 0; x < ssz.width; x += ystep )
{
if (CV_INSTRUMENT_FUN_IPP(ippiApplyHaarClassifier_32f_C1R,
sum1.ptr<float>(y1), (int)sum1.step,
norm1->ptr<float>(y1), (int)norm1->step,
mask1->ptr<uchar>(y1), (int)mask1->step,
ippiSize(ssz.width, ssz.height), &positive,
cascade->hid_cascade->stage_classifier[j].threshold,
(IppiHaarClassifier_32f*)cascade->hid_cascade->ipp_stages[j]) < 0 )
positive = 0;
if( positive <= 0 )
break;
}
CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
if( positive > 0 )
for( y = y1; y < y2; y += ystep )
double gypWeight;
int result = cvRunHaarClassifierCascadeSum( cascade, cvPoint(x,y), gypWeight, 0 );
if( rejectLevels )
{
uchar* mask1row = mask1->ptr(y);
for( x = 0; x < ssz.width; x += ystep )
if( mask1row[x] != 0 )
{
vecLocal.push_back(Rect(cvRound(x*factor), cvRound(y*factor),
winSize.width, winSize.height));
if (vecLocal.size() >= PARALLEL_LOOP_BATCH_SIZE)
{
mtx->lock();
vec->insert(vec->end(), vecLocal.begin(), vecLocal.end());
mtx->unlock();
vecLocal.clear();
}
if( --positive == 0 )
break;
}
if( positive == 0 )
break;
}
}
else
#endif // IPP
for( y = y1; y < y2; y += ystep )
for( x = 0; x < ssz.width; x += ystep )
{
double gypWeight;
int result = cvRunHaarClassifierCascadeSum( cascade, cvPoint(x,y), gypWeight, 0 );
if( rejectLevels )
if( result == 1 )
result = -1*cascade->count;
if( cascade->count + result < 4 )
{
if( result == 1 )
result = -1*cascade->count;
if( cascade->count + result < 4 )
vecLocal.push_back(Rect(cvRound(x*factor), cvRound(y*factor),
winSize.width, winSize.height));
rejectLevelsLocal.push_back(-result);
levelWeightsLocal.push_back(gypWeight);
if (vecLocal.size() >= PARALLEL_LOOP_BATCH_SIZE)
{
vecLocal.push_back(Rect(cvRound(x*factor), cvRound(y*factor),
winSize.width, winSize.height));
rejectLevelsLocal.push_back(-result);
levelWeightsLocal.push_back(gypWeight);
if (vecLocal.size() >= PARALLEL_LOOP_BATCH_SIZE)
{
mtx->lock();
vec->insert(vec->end(), vecLocal.begin(), vecLocal.end());
rejectLevels->insert(rejectLevels->end(), rejectLevelsLocal.begin(), rejectLevelsLocal.end());
levelWeights->insert(levelWeights->end(), levelWeightsLocal.begin(), levelWeightsLocal.end());
mtx->unlock();
vecLocal.clear();
rejectLevelsLocal.clear();
levelWeightsLocal.clear();
}
mtx->lock();
vec->insert(vec->end(), vecLocal.begin(), vecLocal.end());
rejectLevels->insert(rejectLevels->end(), rejectLevelsLocal.begin(), rejectLevelsLocal.end());
levelWeights->insert(levelWeights->end(), levelWeightsLocal.begin(), levelWeightsLocal.end());
mtx->unlock();
vecLocal.clear();
rejectLevelsLocal.clear();
levelWeightsLocal.clear();
}
}
else
}
else
{
if( result > 0 )
{
if( result > 0 )
{
vecLocal.push_back(Rect(cvRound(x*factor), cvRound(y*factor),
winSize.width, winSize.height));
vecLocal.push_back(Rect(cvRound(x*factor), cvRound(y*factor),
winSize.width, winSize.height));
if (vecLocal.size() >= PARALLEL_LOOP_BATCH_SIZE)
{
mtx->lock();
vec->insert(vec->end(), vecLocal.begin(), vecLocal.end());
mtx->unlock();
if (vecLocal.size() >= PARALLEL_LOOP_BATCH_SIZE)
{
mtx->lock();
vec->insert(vec->end(), vecLocal.begin(), vecLocal.end());
mtx->unlock();
vecLocal.clear();
}
vecLocal.clear();
}
}
}
}
if (rejectLevelsLocal.size())
{
@ -1283,12 +1132,6 @@ cvHaarDetectObjectsForROC( const CvArr* _img,
if( flags & CV_HAAR_SCALE_IMAGE )
{
CvSize winSize0 = cascade->orig_window_size;
#ifdef HAVE_IPP
int use_ipp = CV_IPP_CHECK_COND && (cascade->hid_cascade->ipp_stages != 0);
if( use_ipp )
normImg.reset(cvCreateMat( img->rows, img->cols, CV_32FC1));
#endif
imgSmall.reset(cvCreateMat( img->rows + 1, img->cols + 1, CV_8UC1 ));
for( factor = 1; ; factor *= scaleFactor )
@ -1330,15 +1173,7 @@ cvHaarDetectObjectsForROC( const CvArr* _img,
int stripCount = ((sz1.width/ystep)*(sz1.height + ystep-1)/ystep + LOCS_PER_THREAD/2)/LOCS_PER_THREAD;
stripCount = std::min(std::max(stripCount, 1), 100);
#ifdef HAVE_IPP
if( use_ipp )
{
cv::Mat fsum(sum1.rows, sum1.cols, CV_32F, sum1.data.ptr, sum1.step);
cv::cvarrToMat(&sum1).convertTo(fsum, CV_32F, 1, -(1<<24));
}
else
#endif
cvSetImagesForHaarClassifierCascade( cascade, &sum1, &sqsum1, _tilted, 1. );
cvSetImagesForHaarClassifierCascade( cascade, &sum1, &sqsum1, _tilted, 1. );
cv::Mat _norm1 = cv::cvarrToMat(&norm1), _mask1 = cv::cvarrToMat(&mask1);
cv::parallel_for_(cv::Range(0, stripCount),

Loading…
Cancel
Save