diff --git a/3rdparty/carotene/CMakeLists.txt b/3rdparty/carotene/CMakeLists.txt index bfa9368d79..528fcf62e1 100644 --- a/3rdparty/carotene/CMakeLists.txt +++ b/3rdparty/carotene/CMakeLists.txt @@ -20,8 +20,11 @@ if(CMAKE_COMPILER_IS_GNUCC) # - matchTemplate about 5-10% # - goodFeaturesToTrack 10-20% # - cornerHarris 30% for some cases - - set_source_files_properties(${carotene_sources} COMPILE_FLAGS "--param ipcp-unit-growth=100000 --param inline-unit-growth=100000 --param large-stack-frame-growth=5000") + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "10.0.0") + set_source_files_properties(${carotene_sources} COMPILE_FLAGS "--param ipcp-unit-growth=100000 --param inline-unit-growth=100000 --param large-stack-frame-growth=5000") + else() + set_source_files_properties(${carotene_sources} COMPILE_FLAGS "--param ipa-cp-unit-growth=100000 --param inline-unit-growth=100000 --param large-stack-frame-growth=5000") + endif() endif() add_library(carotene_objs OBJECT diff --git a/3rdparty/carotene/hal/CMakeLists.txt b/3rdparty/carotene/hal/CMakeLists.txt index c4b9acaedd..9c66186891 100644 --- a/3rdparty/carotene/hal/CMakeLists.txt +++ b/3rdparty/carotene/hal/CMakeLists.txt @@ -90,7 +90,11 @@ set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS ${carotene_defs}) # matchTemplate about 5-10% # goodFeaturesToTrack 10-20% # cornerHarris 30% for some cases - set_source_files_properties(impl.cpp $ COMPILE_FLAGS "--param ipcp-unit-growth=100000 --param inline-unit-growth=100000 --param large-stack-frame-growth=5000") + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "10.0.0") + set_source_files_properties(impl.cpp $ COMPILE_FLAGS "--param ipcp-unit-growth=100000 --param inline-unit-growth=100000 --param large-stack-frame-growth=5000") + else() + set_source_files_properties(impl.cpp $ COMPILE_FLAGS "--param ipa-cp-unit-growth=100000 --param inline-unit-growth=100000 --param large-stack-frame-growth=5000") + endif() # set_source_files_properties(impl.cpp $ COMPILE_FLAGS "--param ipcp-unit-growth=100000 --param inline-unit-growth=100000 --param large-stack-frame-growth=5000") endif() diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index 6e7fc3bfd3..830c85f7f9 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -318,6 +318,21 @@ if(PPC64LE) endif() endif() +# Apply "-Wl,--as-needed" linker flags: https://github.com/opencv/opencv/issues/7001 +if(NOT OPENCV_SKIP_LINK_AS_NEEDED) + if(UNIX AND (NOT APPLE OR NOT CMAKE_VERSION VERSION_LESS "3.2")) + set(_option "-Wl,--as-needed") + set(_saved_CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${_option}") # requires CMake 3.2+ and CMP0056 + ocv_check_compiler_flag(CXX "" HAVE_LINK_AS_NEEDED) + set(CMAKE_EXE_LINKER_FLAGS "${_saved_CMAKE_EXE_LINKER_FLAGS}") + if(HAVE_LINK_AS_NEEDED) + set(OPENCV_EXTRA_EXE_LINKER_FLAGS "${OPENCV_EXTRA_EXE_LINKER_FLAGS} ${_option}") + set(OPENCV_EXTRA_SHARED_LINKER_FLAGS "${OPENCV_EXTRA_SHARED_LINKER_FLAGS} ${_option}") + endif() + endif() +endif() + # combine all "extra" options if(NOT OPENCV_SKIP_EXTRA_COMPILER_FLAGS) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}") diff --git a/cmake/OpenCVFindOpenEXR.cmake b/cmake/OpenCVFindOpenEXR.cmake index fb6c2fa146..9103f1f5a7 100644 --- a/cmake/OpenCVFindOpenEXR.cmake +++ b/cmake/OpenCVFindOpenEXR.cmake @@ -20,6 +20,8 @@ if(WIN32) elseif(MSVC) SET(OPENEXR_LIBSEARCH_SUFFIXES Win32/Release Win32 Win32/Debug) endif() +elseif(UNIX) + SET(OPENEXR_LIBSEARCH_SUFFIXES ${CMAKE_LIBRARY_ARCHITECTURE}) endif() SET(SEARCH_PATHS @@ -39,6 +41,25 @@ MACRO(FIND_OPENEXR_LIBRARY LIBRARY_NAME LIBRARY_SUFFIX) PATHS "${SEARCH_PATH}/lib" "${SEARCH_PATH}/lib/static") ENDMACRO() +MACRO(ocv_find_openexr LIBRARY_SUFFIX) + IF(NOT OPENEXR_FOUND) + FIND_OPENEXR_LIBRARY("Half" "${LIBRARY_SUFFIX}") + FIND_OPENEXR_LIBRARY("Iex" "${LIBRARY_SUFFIX}") + FIND_OPENEXR_LIBRARY("Imath" "${LIBRARY_SUFFIX}") + FIND_OPENEXR_LIBRARY("IlmImf" "${LIBRARY_SUFFIX}") + FIND_OPENEXR_LIBRARY("IlmThread" "${LIBRARY_SUFFIX}") + IF (OPENEXR_INCLUDE_PATH AND OPENEXR_IMATH_LIBRARY AND OPENEXR_ILMIMF_LIBRARY AND OPENEXR_IEX_LIBRARY AND OPENEXR_HALF_LIBRARY AND OPENEXR_ILMTHREAD_LIBRARY) + SET(OPENEXR_FOUND TRUE) + ELSE() + UNSET(OPENEXR_IMATH_LIBRARY) + UNSET(OPENEXR_ILMIMF_LIBRARY) + UNSET(OPENEXR_IEX_LIBRARY) + UNSET(OPENEXR_ILMTHREAD_LIBRARY) + UNSET(OPENEXR_HALF_LIBRARY) + ENDIF() + ENDIF() +ENDMACRO() + FOREACH(SEARCH_PATH ${SEARCH_PATHS}) FIND_PATH(OPENEXR_INCLUDE_PATH ImfRgbaFile.h PATH_SUFFIXES OpenEXR @@ -64,32 +85,14 @@ FOREACH(SEARCH_PATH ${SEARCH_PATHS}) set(OPENEXR_VERSION "${OPENEXR_VERSION_MAJOR}_${OPENEXR_VERSION_MINOR}") ENDIF () - SET(LIBRARY_SUFFIXES - "-${OPENEXR_VERSION}" - "-${OPENEXR_VERSION}_s" - "-${OPENEXR_VERSION}_d" - "-${OPEXEXR_VERSION}_s_d" - "" - "_s" - "_d" - "_s_d") - - FOREACH(LIBRARY_SUFFIX ${LIBRARY_SUFFIXES}) - FIND_OPENEXR_LIBRARY("Half" ${LIBRARY_SUFFIX}) - FIND_OPENEXR_LIBRARY("Iex" ${LIBRARY_SUFFIX}) - FIND_OPENEXR_LIBRARY("Imath" ${LIBRARY_SUFFIX}) - FIND_OPENEXR_LIBRARY("IlmImf" ${LIBRARY_SUFFIX}) - FIND_OPENEXR_LIBRARY("IlmThread" ${LIBRARY_SUFFIX}) - IF (OPENEXR_INCLUDE_PATH AND OPENEXR_IMATH_LIBRARY AND OPENEXR_ILMIMF_LIBRARY AND OPENEXR_IEX_LIBRARY AND OPENEXR_HALF_LIBRARY) - SET(OPENEXR_FOUND TRUE) - BREAK() - ENDIF() - UNSET(OPENEXR_IMATH_LIBRARY) - UNSET(OPENEXR_ILMIMF_LIBRARY) - UNSET(OPENEXR_IEX_LIBRARY) - UNSET(OPENEXR_ILMTHREAD_LIBRARY) - UNSET(OPENEXR_HALF_LIBRARY) - ENDFOREACH() + ocv_find_openexr("-${OPENEXR_VERSION}") + ocv_find_openexr("-${OPENEXR_VERSION}_s") + ocv_find_openexr("-${OPENEXR_VERSION}_d") + ocv_find_openexr("-${OPEXEXR_VERSION}_s_d") + ocv_find_openexr("") + ocv_find_openexr("_s") + ocv_find_openexr("_d") + ocv_find_openexr("_s_d") IF (OPENEXR_FOUND) BREAK() diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index a7ac5e06e8..c7da395f0a 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -540,7 +540,7 @@ macro(ocv_check_flag_support lang flag varname base_options) string(TOUPPER "${flag}" ${varname}) string(REGEX REPLACE "^(/|-)" "HAVE_${_lang}_" ${varname} "${${varname}}") - string(REGEX REPLACE " -|-|=| |\\." "_" ${varname} "${${varname}}") + string(REGEX REPLACE " -|-|=| |\\.|," "_" ${varname} "${${varname}}") ocv_check_compiler_flag("${_lang}" "${base_options} ${flag}" ${${varname}} ${ARGN}) endmacro() diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp index d9719b7fa0..c3e89b98c1 100644 --- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -213,7 +213,7 @@ Regular integers: |min, max | x | x | x | x | x | x | |absdiff | x | x | x | x | x | x | |absdiffs | | x | | x | | | -|reduce | | | | | x | x | +|reduce | x | x | x | x | x | x | |mask | x | x | x | x | x | x | |pack | x | x | x | x | x | x | |pack_u | x | | x | | | | @@ -670,7 +670,7 @@ Scheme: @code {A1 A2 A3 ...} => min(A1,A2,A3,...) @endcode -For 32-bit integer and 32-bit floating point types. */ +For all types except 64-bit integer and 64-bit floating point types. */ OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min) /** @brief Find one max value @@ -679,7 +679,7 @@ Scheme: @code {A1 A2 A3 ...} => max(A1,A2,A3,...) @endcode -For 32-bit integer and 32-bit floating point types. */ +For all types except 64-bit integer and 64-bit floating point types. */ OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max) static const unsigned char popCountTable[] = @@ -1219,7 +1219,7 @@ Scheme: @code {A1 A2 A3 ...} => sum{A1,A2,A3,...} @endcode -For 32-bit integer and 32-bit floating point types.*/ +*/ template inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a) { typename V_TypeTraits<_Tp>::sum_type c = a.s[0]; diff --git a/modules/core/include/opencv2/core/hal/intrin_msa.hpp b/modules/core/include/opencv2/core/hal/intrin_msa.hpp index 76e6bed200..260350c6aa 100755 --- a/modules/core/include/opencv2/core/hal/intrin_msa.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_msa.hpp @@ -1000,6 +1000,22 @@ OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_int32x4, int, min, std::min) OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_float32x4, float, max, std::max) OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_float32x4, float, min, std::min) + +#define OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(_Tpvec, scalartype, _Tpvec2, func) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + _Tpvec2 a1, a2; \ + v_expand(a, a1, a2); \ + return (scalartype)v_reduce_##func(v_##func(a1, a2)); \ +} + +OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_uint8x16, uchar, v_uint16x8, min) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_uint8x16, uchar, v_uint16x8, max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_int8x16, char, v_int16x8, min) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_int8x16, char, v_int16x8, max) + + + #define OPENCV_HAL_IMPL_MSA_REDUCE_SUM(_Tpvec, scalartype, suffix) \ inline scalartype v_reduce_sum(const _Tpvec& a) \ { \ diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index 7411b28d0c..280691b448 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -1241,6 +1241,20 @@ inline int v_reduce_sum(const v_int16x8& a) return vget_lane_s32(vpadd_s32(t1, t1), 0); } +#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \ + a0 = vp##vectorfunc##_##suffix(a0, a0); \ + a0 = vp##vectorfunc##_##suffix(a0, a0); \ + return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \ +} + +OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, max, max, u8) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, min, min, u8) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, max, max, s8) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, min, min, s8) + #define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ inline scalartype v_reduce_##func(const _Tpvec& a) \ { \ @@ -1249,10 +1263,10 @@ inline scalartype v_reduce_##func(const _Tpvec& a) \ return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \ } -OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned int, max, max, u16) -OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned int, min, min, u16) -OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, int, max, max, s16) -OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, int, min, min, s16) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, max, max, u16) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, min, min, u16) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, max, max, s16) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, min, min, s16) #define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ inline scalartype v_reduce_##func(const _Tpvec& a) \ @@ -2129,10 +2143,12 @@ inline v_float32x4 v_lut(const float* tab, const int* idx) } inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) { + typedef uint64 CV_DECL_ALIGNED(1) unaligned_uint64; + uint64 CV_DECL_ALIGNED(32) elems[2] = { - *(uint64*)(tab + idx[0]), - *(uint64*)(tab + idx[1]) + *(unaligned_uint64*)(tab + idx[0]), + *(unaligned_uint64*)(tab + idx[1]) }; return v_float32x4(vreinterpretq_f32_u64(vld1q_u64(elems))); } diff --git a/modules/core/src/minmax.cpp b/modules/core/src/minmax.cpp index da75e20a1d..b4e5e4632a 100644 --- a/modules/core/src/minmax.cpp +++ b/modules/core/src/minmax.cpp @@ -71,33 +71,759 @@ minMaxIdx_( const T* src, const uchar* mask, WT* _minVal, WT* _maxVal, *_maxVal = maxVal; } +#if CV_SIMD128 +template CV_ALWAYS_INLINE void +minMaxIdx_init( const T* src, const uchar* mask, WT* minval, WT* maxval, + size_t* minidx, size_t* maxidx, WT &minVal, WT &maxVal, + size_t &minIdx, size_t &maxIdx, const WT minInit, const WT maxInit, + const int nlanes, int len, size_t startidx, int &j, int &len0 ) +{ + len0 = len & -nlanes; + j = 0; + + minVal = *minval, maxVal = *maxval; + minIdx = *minidx, maxIdx = *maxidx; + + // To handle start values out of range + if ( minVal < minInit || maxVal < minInit || minVal > maxInit || maxVal > maxInit ) + { + uchar done = 0x00; + + for ( ; (j < len) && (done != 0x03); j++ ) + { + if ( !mask || mask[j] ) { + T val = src[j]; + if ( val < minVal ) + { + minVal = val; + minIdx = startidx + j; + done |= 0x01; + } + if ( val > maxVal ) + { + maxVal = val; + maxIdx = startidx + j; + done |= 0x02; + } + } + } + + len0 = j + ((len - j) & -nlanes); + } +} + +#if CV_SIMD128_64F +CV_ALWAYS_INLINE double v_reduce_min(const v_float64x2& a) +{ + double CV_DECL_ALIGNED(32) idx[2]; + v_store_aligned(idx, a); + return std::min(idx[0], idx[1]); +} + +CV_ALWAYS_INLINE double v_reduce_max(const v_float64x2& a) +{ + double CV_DECL_ALIGNED(32) idx[2]; + v_store_aligned(idx, a); + return std::max(idx[0], idx[1]); +} + +CV_ALWAYS_INLINE uint64_t v_reduce_min(const v_uint64x2& a) +{ + uint64_t CV_DECL_ALIGNED(32) idx[2]; + v_store_aligned(idx, a); + return std::min(idx[0], idx[1]); +} + +CV_ALWAYS_INLINE v_uint64x2 v_select(const v_uint64x2& mask, const v_uint64x2& a, const v_uint64x2& b) +{ + return b ^ ((a ^ b) & mask); +} +#endif + +#define MINMAXIDX_REDUCE(suffix, suffix2, maxLimit, IR) \ +template CV_ALWAYS_INLINE void \ +minMaxIdx_reduce_##suffix( VT &valMin, VT &valMax, IT &idxMin, IT &idxMax, IT &none, \ + T &minVal, T &maxVal, size_t &minIdx, size_t &maxIdx, \ + size_t delta ) \ +{ \ + if ( v_check_any(idxMin != none) ) \ + { \ + minVal = v_reduce_min(valMin); \ + minIdx = (size_t)v_reduce_min(v_select(v_reinterpret_as_##suffix2(v_setall_##suffix((IR)minVal) == valMin), \ + idxMin, v_setall_##suffix2(maxLimit))) + delta; \ + } \ + if ( v_check_any(idxMax != none) ) \ + { \ + maxVal = v_reduce_max(valMax); \ + maxIdx = (size_t)v_reduce_min(v_select(v_reinterpret_as_##suffix2(v_setall_##suffix((IR)maxVal) == valMax), \ + idxMax, v_setall_##suffix2(maxLimit))) + delta; \ + } \ +} + +MINMAXIDX_REDUCE(u8, u8, UCHAR_MAX, uchar) +MINMAXIDX_REDUCE(s8, u8, UCHAR_MAX, uchar) +MINMAXIDX_REDUCE(u16, u16, USHRT_MAX, ushort) +MINMAXIDX_REDUCE(s16, u16, USHRT_MAX, ushort) +MINMAXIDX_REDUCE(s32, u32, UINT_MAX, uint) +MINMAXIDX_REDUCE(f32, u32, (1 << 23) - 1, float) +#if CV_SIMD128_64F +MINMAXIDX_REDUCE(f64, u64, UINT_MAX, double) +#endif + +template CV_ALWAYS_INLINE void +minMaxIdx_finish( const T* src, const uchar* mask, WT* minval, WT* maxval, + size_t* minidx, size_t* maxidx, WT minVal, WT maxVal, + size_t minIdx, size_t maxIdx, int len, size_t startidx, + int j ) +{ + for ( ; j < len ; j++ ) + { + if ( !mask || mask[j] ) + { + T val = src[j]; + if ( val < minVal ) + { + minVal = val; + minIdx = startidx + j; + } + if ( val > maxVal ) + { + maxVal = val; + maxIdx = startidx + j; + } + } + } + + *minidx = minIdx; + *maxidx = maxIdx; + *minval = minVal; + *maxval = maxVal; +} +#endif + static void minMaxIdx_8u(const uchar* src, const uchar* mask, int* minval, int* maxval, size_t* minidx, size_t* maxidx, int len, size_t startidx ) -{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } +{ +#if CV_SIMD128 + if ( len >= v_uint8x16::nlanes ) + { + int j, len0; + int minVal, maxVal; + size_t minIdx, maxIdx; + + minMaxIdx_init( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, minIdx, maxIdx, + (int)0, (int)UCHAR_MAX, v_uint8x16::nlanes, len, startidx, j, len0 ); + + if ( j <= len0 - v_uint8x16::nlanes ) + { + v_uint8x16 inc = v_setall_u8(v_uint8x16::nlanes); + v_uint8x16 none = v_reinterpret_as_u8(v_setall_s8(-1)); + v_uint8x16 idxStart(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + do + { + v_uint8x16 valMin = v_setall_u8((uchar)minVal), valMax = v_setall_u8((uchar)maxVal); + v_uint8x16 idx = idxStart, idxMin = none, idxMax = none; + + int k = j; + size_t delta = startidx + j; + + if ( !mask ) + { + for( ; k < std::min(len0, j + 15 * v_uint8x16::nlanes); k += v_uint8x16::nlanes ) + { + v_uint8x16 data = v_load(src + k); + v_uint8x16 cmpMin = (data < valMin); + v_uint8x16 cmpMax = (data > valMax); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_min(data, valMin); + valMax = v_max(data, valMax); + idx += inc; + } + } + else + { + for( ; k < std::min(len0, j + 15 * v_uint8x16::nlanes); k += v_uint8x16::nlanes ) + { + v_uint8x16 data = v_load(src + k); + v_uint8x16 maskVal = v_load(mask + k) != v_setzero_u8(); + v_uint8x16 cmpMin = (data < valMin) & maskVal; + v_uint8x16 cmpMax = (data > valMax) & maskVal; + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_select(cmpMin, data, valMin); + valMax = v_select(cmpMax, data, valMax); + idx += inc; + } + } + + j = k; + + minMaxIdx_reduce_u8( valMin, valMax, idxMin, idxMax, none, minVal, maxVal, + minIdx, maxIdx, delta ); + } + while ( j < len0 ); + } + + minMaxIdx_finish( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, + minIdx, maxIdx, len, startidx, j ); + } + else + { + minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx); + } +#else + minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx); +#endif +} static void minMaxIdx_8s(const schar* src, const uchar* mask, int* minval, int* maxval, size_t* minidx, size_t* maxidx, int len, size_t startidx ) -{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } +{ +#if CV_SIMD128 + if ( len >= v_int8x16::nlanes ) + { + int j, len0; + int minVal, maxVal; + size_t minIdx, maxIdx; + + minMaxIdx_init( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, minIdx, maxIdx, + (int)SCHAR_MIN, (int)SCHAR_MAX, v_int8x16::nlanes, len, startidx, j, len0 ); + + if ( j <= len0 - v_int8x16::nlanes ) + { + v_uint8x16 inc = v_setall_u8(v_int8x16::nlanes); + v_uint8x16 none = v_reinterpret_as_u8(v_setall_s8(-1)); + v_uint8x16 idxStart(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + do + { + v_int8x16 valMin = v_setall_s8((schar)minVal), valMax = v_setall_s8((schar)maxVal); + v_uint8x16 idx = idxStart, idxMin = none, idxMax = none; + + int k = j; + size_t delta = startidx + j; + + if ( !mask ) + { + for( ; k < std::min(len0, j + 15 * v_int8x16::nlanes); k += v_int8x16::nlanes ) + { + v_int8x16 data = v_load(src + k); + v_uint8x16 cmpMin = v_reinterpret_as_u8(data < valMin); + v_uint8x16 cmpMax = v_reinterpret_as_u8(data > valMax); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_min(data, valMin); + valMax = v_max(data, valMax); + idx += inc; + } + } + else + { + for( ; k < std::min(len0, j + 15 * v_int8x16::nlanes); k += v_int8x16::nlanes ) + { + v_int8x16 data = v_load(src + k); + v_uint8x16 maskVal = v_load(mask + k) != v_setzero_u8(); + v_uint8x16 cmpMin = v_reinterpret_as_u8(data < valMin) & maskVal; + v_uint8x16 cmpMax = v_reinterpret_as_u8(data > valMax) & maskVal; + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_select(v_reinterpret_as_s8(cmpMin), data, valMin); + valMax = v_select(v_reinterpret_as_s8(cmpMax), data, valMax); + idx += inc; + } + } + + j = k; + + minMaxIdx_reduce_s8( valMin, valMax, idxMin, idxMax, none, minVal, maxVal, + minIdx, maxIdx, delta ); + } + while ( j < len0 ); + } + + minMaxIdx_finish( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, + minIdx, maxIdx, len, startidx, j ); + } + else + { + minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx); + } +#else + minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); +#endif +} static void minMaxIdx_16u(const ushort* src, const uchar* mask, int* minval, int* maxval, size_t* minidx, size_t* maxidx, int len, size_t startidx ) -{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } +{ +#if CV_SIMD128 + if ( len >= v_uint16x8::nlanes ) + { + int j, len0; + int minVal, maxVal; + size_t minIdx, maxIdx; + + minMaxIdx_init( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, minIdx, maxIdx, + (int)0, (int)USHRT_MAX, v_uint16x8::nlanes, len, startidx, j, len0 ); + + if ( j <= len0 - v_uint16x8::nlanes ) + { + v_uint16x8 inc = v_setall_u16(v_uint16x8::nlanes); + v_uint16x8 none = v_reinterpret_as_u16(v_setall_s16(-1)); + v_uint16x8 idxStart(0, 1, 2, 3, 4, 5, 6, 7); + + do + { + v_uint16x8 valMin = v_setall_u16((ushort)minVal), valMax = v_setall_u16((ushort)maxVal); + v_uint16x8 idx = idxStart, idxMin = none, idxMax = none; + + int k = j; + size_t delta = startidx + j; + + if ( !mask ) + { + for( ; k < std::min(len0, j + 8191 * v_uint16x8::nlanes); k += v_uint16x8::nlanes ) + { + v_uint16x8 data = v_load(src + k); + v_uint16x8 cmpMin = (data < valMin); + v_uint16x8 cmpMax = (data > valMax); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_min(data, valMin); + valMax = v_max(data, valMax); + idx += inc; + } + } + else + { + for( ; k < std::min(len0, j + 8191 * v_uint16x8::nlanes); k += v_uint16x8::nlanes ) + { + v_uint16x8 data = v_load(src + k); + v_uint16x8 maskVal = v_load_expand(mask + k) != v_setzero_u16(); + v_uint16x8 cmpMin = (data < valMin) & maskVal; + v_uint16x8 cmpMax = (data > valMax) & maskVal; + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_select(cmpMin, data, valMin); + valMax = v_select(cmpMax, data, valMax); + idx += inc; + } + } + + j = k; + + minMaxIdx_reduce_u16( valMin, valMax, idxMin, idxMax, none, minVal, maxVal, + minIdx, maxIdx, delta ); + } + while ( j < len0 ); + } + + minMaxIdx_finish( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, + minIdx, maxIdx, len, startidx, j ); + } + else + { + minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx); + } +#else + minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); +#endif +} static void minMaxIdx_16s(const short* src, const uchar* mask, int* minval, int* maxval, size_t* minidx, size_t* maxidx, int len, size_t startidx ) -{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } +{ +#if CV_SIMD128 + if ( len >= v_int16x8::nlanes ) + { + int j, len0; + int minVal, maxVal; + size_t minIdx, maxIdx; + + minMaxIdx_init( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, minIdx, maxIdx, + (int)SHRT_MIN, (int)SHRT_MAX, v_int16x8::nlanes, len, startidx, j, len0 ); + + if ( j <= len0 - v_int16x8::nlanes ) + { + v_uint16x8 inc = v_setall_u16(v_int16x8::nlanes); + v_uint16x8 none = v_reinterpret_as_u16(v_setall_s16(-1)); + v_uint16x8 idxStart(0, 1, 2, 3, 4, 5, 6, 7); + + do + { + v_int16x8 valMin = v_setall_s16((short)minVal), valMax = v_setall_s16((short)maxVal); + v_uint16x8 idx = idxStart, idxMin = none, idxMax = none; + + int k = j; + size_t delta = startidx + j; + + if ( !mask ) + { + for( ; k < std::min(len0, j + 8191 * v_int16x8::nlanes); k += v_int16x8::nlanes ) + { + v_int16x8 data = v_load(src + k); + v_uint16x8 cmpMin = v_reinterpret_as_u16(data < valMin); + v_uint16x8 cmpMax = v_reinterpret_as_u16(data > valMax); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_min(data, valMin); + valMax = v_max(data, valMax); + idx += inc; + } + } + else + { + for( ; k < std::min(len0, j + 8191 * v_int16x8::nlanes); k += v_int16x8::nlanes ) + { + v_int16x8 data = v_load(src + k); + v_uint16x8 maskVal = v_load_expand(mask + k) != v_setzero_u16(); + v_uint16x8 cmpMin = v_reinterpret_as_u16(data < valMin) & maskVal; + v_uint16x8 cmpMax = v_reinterpret_as_u16(data > valMax) & maskVal; + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_select(v_reinterpret_as_s16(cmpMin), data, valMin); + valMax = v_select(v_reinterpret_as_s16(cmpMax), data, valMax); + idx += inc; + } + } + + j = k; + + minMaxIdx_reduce_s16( valMin, valMax, idxMin, idxMax, none, minVal, maxVal, + minIdx, maxIdx, delta ); + } + while ( j < len0 ); + } + + minMaxIdx_finish( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, + minIdx, maxIdx, len, startidx, j ); + } + else + { + minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx); + } +#else + minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); +#endif +} static void minMaxIdx_32s(const int* src, const uchar* mask, int* minval, int* maxval, size_t* minidx, size_t* maxidx, int len, size_t startidx ) -{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } +{ +#if CV_SIMD128 + if ( len >= 2 * v_int32x4::nlanes ) + { + int j = 0, len0 = len & -(2 * v_int32x4::nlanes); + int minVal = *minval, maxVal = *maxval; + size_t minIdx = *minidx, maxIdx = *maxidx; + + { + v_uint32x4 inc = v_setall_u32(v_int32x4::nlanes); + v_uint32x4 none = v_reinterpret_as_u32(v_setall_s32(-1)); + v_uint32x4 idxStart(0, 1, 2, 3); + + do + { + v_int32x4 valMin = v_setall_s32(minVal), valMax = v_setall_s32(maxVal); + v_uint32x4 idx = idxStart, idxMin = none, idxMax = none; + + int k = j; + size_t delta = startidx + j; + + if ( !mask ) + { + for( ; k < std::min(len0, j + 32766 * 2 * v_int32x4::nlanes); k += 2 * v_int32x4::nlanes ) + { + v_int32x4 data = v_load(src + k); + v_uint32x4 cmpMin = v_reinterpret_as_u32(data < valMin); + v_uint32x4 cmpMax = v_reinterpret_as_u32(data > valMax); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_min(data, valMin); + valMax = v_max(data, valMax); + idx += inc; + data = v_load(src + k + v_int32x4::nlanes); + cmpMin = v_reinterpret_as_u32(data < valMin); + cmpMax = v_reinterpret_as_u32(data > valMax); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_min(data, valMin); + valMax = v_max(data, valMax); + idx += inc; + } + } + else + { + for( ; k < std::min(len0, j + 32766 * 2 * v_int32x4::nlanes); k += 2 * v_int32x4::nlanes ) + { + v_int32x4 data = v_load(src + k); + v_uint16x8 maskVal = v_load_expand(mask + k) != v_setzero_u16(); + v_int32x4 maskVal1, maskVal2; + v_expand(v_reinterpret_as_s16(maskVal), maskVal1, maskVal2); + v_uint32x4 cmpMin = v_reinterpret_as_u32((data < valMin) & maskVal1); + v_uint32x4 cmpMax = v_reinterpret_as_u32((data > valMax) & maskVal1); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_select(v_reinterpret_as_s32(cmpMin), data, valMin); + valMax = v_select(v_reinterpret_as_s32(cmpMax), data, valMax); + idx += inc; + data = v_load(src + k + v_int32x4::nlanes); + cmpMin = v_reinterpret_as_u32((data < valMin) & maskVal2); + cmpMax = v_reinterpret_as_u32((data > valMax) & maskVal2); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_select(v_reinterpret_as_s32(cmpMin), data, valMin); + valMax = v_select(v_reinterpret_as_s32(cmpMax), data, valMax); + idx += inc; + } + } + + j = k; + + minMaxIdx_reduce_s32( valMin, valMax, idxMin, idxMax, none, minVal, maxVal, + minIdx, maxIdx, delta ); + } + while ( j < len0 ); + } + + minMaxIdx_finish( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, + minIdx, maxIdx, len, startidx, j ); + } + else + { + minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx); + } +#else + minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); +#endif +} static void minMaxIdx_32f(const float* src, const uchar* mask, float* minval, float* maxval, size_t* minidx, size_t* maxidx, int len, size_t startidx ) -{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } +{ +#if CV_SIMD128 + if ( len >= 2 * v_float32x4::nlanes ) + { + int j, len0; + float minVal, maxVal; + size_t minIdx, maxIdx; + + minMaxIdx_init( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, minIdx, maxIdx, + FLT_MIN, FLT_MAX, 2 * v_float32x4::nlanes, len, startidx, j, len0 ); + + if ( j <= len0 - 2 * v_float32x4::nlanes ) + { + v_uint32x4 inc = v_setall_u32(v_float32x4::nlanes); + v_uint32x4 none = v_reinterpret_as_u32(v_setall_s32(-1)); + v_uint32x4 idxStart(0, 1, 2, 3); + + do + { + v_float32x4 valMin = v_setall_f32(minVal), valMax = v_setall_f32(maxVal); + v_uint32x4 idx = idxStart, idxMin = none, idxMax = none; + + int k = j; + size_t delta = startidx + j; + + if ( !mask ) + { + for( ; k < std::min(len0, j + 32766 * 2 * v_float32x4::nlanes); k += 2 * v_float32x4::nlanes ) + { + v_float32x4 data = v_load(src + k); + v_uint32x4 cmpMin = v_reinterpret_as_u32(data < valMin); + v_uint32x4 cmpMax = v_reinterpret_as_u32(data > valMax); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_min(data, valMin); + valMax = v_max(data, valMax); + idx += inc; + data = v_load(src + k + v_float32x4::nlanes); + cmpMin = v_reinterpret_as_u32(data < valMin); + cmpMax = v_reinterpret_as_u32(data > valMax); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_min(data, valMin); + valMax = v_max(data, valMax); + idx += inc; + } + } + else + { + for( ; k < std::min(len0, j + 32766 * 2 * v_float32x4::nlanes); k += 2 * v_float32x4::nlanes ) + { + v_float32x4 data = v_load(src + k); + v_uint16x8 maskVal = v_load_expand(mask + k) != v_setzero_u16(); + v_int32x4 maskVal1, maskVal2; + v_expand(v_reinterpret_as_s16(maskVal), maskVal1, maskVal2); + v_uint32x4 cmpMin = v_reinterpret_as_u32(v_reinterpret_as_s32(data < valMin) & maskVal1); + v_uint32x4 cmpMax = v_reinterpret_as_u32(v_reinterpret_as_s32(data > valMax) & maskVal1); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_select(v_reinterpret_as_f32(cmpMin), data, valMin); + valMax = v_select(v_reinterpret_as_f32(cmpMax), data, valMax); + idx += inc; + data = v_load(src + k + v_float32x4::nlanes); + cmpMin = v_reinterpret_as_u32(v_reinterpret_as_s32(data < valMin) & maskVal2); + cmpMax = v_reinterpret_as_u32(v_reinterpret_as_s32(data > valMax) & maskVal2); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_select(v_reinterpret_as_f32(cmpMin), data, valMin); + valMax = v_select(v_reinterpret_as_f32(cmpMax), data, valMax); + idx += inc; + } + } + + j = k; + + minMaxIdx_reduce_f32( valMin, valMax, idxMin, idxMax, none, minVal, maxVal, + minIdx, maxIdx, delta ); + } + while ( j < len0 ); + } + + minMaxIdx_finish( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, + minIdx, maxIdx, len, startidx, j ); + } + else + { + minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx); + } +#else + minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); +#endif +} static void minMaxIdx_64f(const double* src, const uchar* mask, double* minval, double* maxval, size_t* minidx, size_t* maxidx, int len, size_t startidx ) -{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } +{ +#if CV_SIMD128_64F + if ( len >= 4 * v_float64x2::nlanes ) + { + int j, len0; + double minVal, maxVal; + size_t minIdx, maxIdx; + + minMaxIdx_init( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, minIdx, maxIdx, + DBL_MIN, DBL_MAX, 4 * v_float64x2::nlanes, len, startidx, j, len0 ); + + if ( j <= len0 - 4 * v_float64x2::nlanes ) + { + v_uint64x2 inc = v_setall_u64(v_float64x2::nlanes); + v_uint64x2 none = v_reinterpret_as_u64(v_setall_s64(-1)); + v_uint64x2 idxStart(0, 1); + + do + { + v_float64x2 valMin = v_setall_f64(minVal), valMax = v_setall_f64(maxVal); + v_uint64x2 idx = idxStart, idxMin = none, idxMax = none; + + int k = j; + size_t delta = startidx + j; + + if ( !mask ) + { + for( ; k < std::min(len0, j + 32764 * 4 * v_float64x2::nlanes); k += 4 * v_float64x2::nlanes ) + { + v_float64x2 data = v_load(src + k); + v_uint64x2 cmpMin = v_reinterpret_as_u64(data < valMin); + v_uint64x2 cmpMax = v_reinterpret_as_u64(data > valMax); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_min(data, valMin); + valMax = v_max(data, valMax); + idx += inc; + data = v_load(src + k + v_float64x2::nlanes); + cmpMin = v_reinterpret_as_u64(data < valMin); + cmpMax = v_reinterpret_as_u64(data > valMax); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_min(data, valMin); + valMax = v_max(data, valMax); + idx += inc; + data = v_load(src + k + 2 * v_float64x2::nlanes); + cmpMin = v_reinterpret_as_u64(data < valMin); + cmpMax = v_reinterpret_as_u64(data > valMax); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_min(data, valMin); + valMax = v_max(data, valMax); + idx += inc; + data = v_load(src + k + 3 * v_float64x2::nlanes); + cmpMin = v_reinterpret_as_u64(data < valMin); + cmpMax = v_reinterpret_as_u64(data > valMax); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_min(data, valMin); + valMax = v_max(data, valMax); + idx += inc; + } + } + else + { + for( ; k < std::min(len0, j + 32764 * 4 * v_float64x2::nlanes); k += 4 * v_float64x2::nlanes ) + { + v_float64x2 data = v_load(src + k); + v_uint16x8 maskVal = v_load_expand(mask + k) != v_setzero_u16(); + v_int32x4 maskVal1, maskVal2; + v_expand(v_reinterpret_as_s16(maskVal), maskVal1, maskVal2); + v_int64x2 maskVal3, maskVal4; + v_expand(maskVal1, maskVal3, maskVal4); + v_uint64x2 cmpMin = v_reinterpret_as_u64(v_reinterpret_as_s64(data < valMin) & maskVal3); + v_uint64x2 cmpMax = v_reinterpret_as_u64(v_reinterpret_as_s64(data > valMax) & maskVal3); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_select(v_reinterpret_as_f64(cmpMin), data, valMin); + valMax = v_select(v_reinterpret_as_f64(cmpMax), data, valMax); + idx += inc; + data = v_load(src + k + v_float64x2::nlanes); + cmpMin = v_reinterpret_as_u64(v_reinterpret_as_s64(data < valMin) & maskVal4); + cmpMax = v_reinterpret_as_u64(v_reinterpret_as_s64(data > valMax) & maskVal4); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_select(v_reinterpret_as_f64(cmpMin), data, valMin); + valMax = v_select(v_reinterpret_as_f64(cmpMax), data, valMax); + idx += inc; + data = v_load(src + k + 2 * v_float64x2::nlanes); + v_expand(maskVal2, maskVal3, maskVal4); + cmpMin = v_reinterpret_as_u64(v_reinterpret_as_s64(data < valMin) & maskVal3); + cmpMax = v_reinterpret_as_u64(v_reinterpret_as_s64(data > valMax) & maskVal3); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_select(v_reinterpret_as_f64(cmpMin), data, valMin); + valMax = v_select(v_reinterpret_as_f64(cmpMax), data, valMax); + idx += inc; + data = v_load(src + k + 3 * v_float64x2::nlanes); + cmpMin = v_reinterpret_as_u64(v_reinterpret_as_s64(data < valMin) & maskVal4); + cmpMax = v_reinterpret_as_u64(v_reinterpret_as_s64(data > valMax) & maskVal4); + idxMin = v_select(cmpMin, idx, idxMin); + idxMax = v_select(cmpMax, idx, idxMax); + valMin = v_select(v_reinterpret_as_f64(cmpMin), data, valMin); + valMax = v_select(v_reinterpret_as_f64(cmpMax), data, valMax); + idx += inc; + } + } + + j = k; + + minMaxIdx_reduce_f64( valMin, valMax, idxMin, idxMax, none, minVal, maxVal, + minIdx, maxIdx, delta ); + } + while ( j < len0 ); + } + + minMaxIdx_finish( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, + minIdx, maxIdx, len, startidx, j ); + } + else + { + minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx); + } +#else + minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); +#endif +} typedef void (*MinMaxIdxFunc)(const uchar*, const uchar*, int*, int*, size_t*, size_t*, int, size_t); diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index d8d94fdb0d..6731091463 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -894,13 +894,18 @@ template struct TheTest TheTest & test_reduce() { Data dataA; + int sum = 0; + for (int i = 0; i < R::nlanes; ++i) + { + sum += (int)(dataA[i]); // To prevent a constant overflow with int8 + } R a = dataA; - EXPECT_EQ((LaneType)1, v_reduce_min(a)); - EXPECT_EQ((LaneType)R::nlanes, v_reduce_max(a)); - EXPECT_EQ((LaneType)((1 + R::nlanes)*R::nlanes/2), v_reduce_sum(a)); + EXPECT_EQ((LaneType)1, (LaneType)v_reduce_min(a)); + EXPECT_EQ((LaneType)(R::nlanes), (LaneType)v_reduce_max(a)); + EXPECT_EQ((int)(sum), (int)v_reduce_sum(a)); dataA[0] += R::nlanes; R an = dataA; - EXPECT_EQ((LaneType)2, v_reduce_min(an)); + EXPECT_EQ((LaneType)2, (LaneType)v_reduce_min(an)); return *this; } @@ -1588,6 +1593,7 @@ void test_hal_intrin_uint8() .test_dotprod_expand() .test_min_max() .test_absdiff() + .test_reduce() .test_reduce_sad() .test_mask() .test_popcount() @@ -1629,6 +1635,7 @@ void test_hal_intrin_int8() .test_absdiff() .test_absdiffs() .test_abs() + .test_reduce() .test_reduce_sad() .test_mask() .test_popcount() diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index d25096fbb7..4699dac9ca 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -86,7 +86,7 @@ CV__DNN_INLINE_NS_BEGIN */ enum Target { - DNN_TARGET_CPU, + DNN_TARGET_CPU = 0, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16, DNN_TARGET_MYRIAD, @@ -97,7 +97,7 @@ CV__DNN_INLINE_NS_BEGIN }; CV_EXPORTS std::vector< std::pair > getAvailableBackends(); - CV_EXPORTS std::vector getAvailableTargets(Backend be); + CV_EXPORTS_W std::vector getAvailableTargets(dnn::Backend be); /** @brief This class provides all data needed to initialize layer. * diff --git a/modules/dnn/misc/java/gen_dict.json b/modules/dnn/misc/java/gen_dict.json index c0e0a1b6ea..5a397eac51 100644 --- a/modules/dnn/misc/java/gen_dict.json +++ b/modules/dnn/misc/java/gen_dict.json @@ -36,6 +36,14 @@ "v_type": "vector_Layer", "j_import": "org.opencv.dnn.Layer" }, + "vector_Target": { + "j_type": "List", + "jn_type": "List", + "jni_type": "jobject", + "jni_var": "std::vector< cv::dnn::Target > %(n)s", + "suffix": "Ljava_util_List", + "v_type": "vector_Target" + }, "LayerId": { "j_type": "DictValue", "jn_type": "long", diff --git a/modules/dnn/misc/java/src/cpp/dnn_converters.cpp b/modules/dnn/misc/java/src/cpp/dnn_converters.cpp index 1d259ae327..95184c0e90 100644 --- a/modules/dnn/misc/java/src/cpp/dnn_converters.cpp +++ b/modules/dnn/misc/java/src/cpp/dnn_converters.cpp @@ -60,6 +60,25 @@ jobject vector_Ptr_Layer_to_List(JNIEnv* env, std::vector& vs) +{ + static jclass juArrayList = ARRAYLIST(env); + static jmethodID m_create = CONSTRUCTOR(env, juArrayList); + jmethodID m_add = LIST_ADD(env, juArrayList); + + static jclass jInteger = env->FindClass("java/lang/Integer"); + static jmethodID m_create_Integer = env->GetMethodID(jInteger, "", "(I)V"); + + jobject result = env->NewObject(juArrayList, m_create, vs.size()); + for (size_t i = 0; i < vs.size(); ++i) + { + jobject element = env->NewObject(jInteger, m_create_Integer, vs[i]); + env->CallBooleanMethod(result, m_add, element); + env->DeleteLocalRef(element); + } + return result; +} + std::vector > List_to_vector_Ptr_Layer(JNIEnv* env, jobject list) { static jclass juArrayList = ARRAYLIST(env); diff --git a/modules/dnn/misc/java/src/cpp/dnn_converters.hpp b/modules/dnn/misc/java/src/cpp/dnn_converters.hpp index 6a7270174c..e1f63e0a00 100644 --- a/modules/dnn/misc/java/src/cpp/dnn_converters.hpp +++ b/modules/dnn/misc/java/src/cpp/dnn_converters.hpp @@ -28,5 +28,6 @@ jobject vector_Ptr_Layer_to_List(JNIEnv* env, std::vector > List_to_vector_Ptr_Layer(JNIEnv* env, jobject list); +jobject vector_Target_to_List(JNIEnv* env, std::vector& vs); #endif /* DNN_CONVERTERS_HPP */ diff --git a/modules/dnn/misc/java/test/DnnTensorFlowTest.java b/modules/dnn/misc/java/test/DnnTensorFlowTest.java index 4e96c73e28..469573642b 100644 --- a/modules/dnn/misc/java/test/DnnTensorFlowTest.java +++ b/modules/dnn/misc/java/test/DnnTensorFlowTest.java @@ -141,4 +141,9 @@ public class DnnTensorFlowTest extends OpenCVTestCase { net = Dnn.readNetFromTensorflow(new MatOfByte(modelBuffer)); checkInceptionNet(net); } + + public void testGetAvailableTargets() { + List targets = Dnn.getAvailableTargets(Dnn.DNN_BACKEND_OPENCV); + assertTrue(targets.contains(Dnn.DNN_TARGET_CPU)); + } } diff --git a/modules/dnn/misc/python/pyopencv_dnn.hpp b/modules/dnn/misc/python/pyopencv_dnn.hpp index 34aeacb08d..69c14240c3 100644 --- a/modules/dnn/misc/python/pyopencv_dnn.hpp +++ b/modules/dnn/misc/python/pyopencv_dnn.hpp @@ -71,6 +71,12 @@ PyObject* pyopencv_from(const dnn::LayerParams& lp) return dict; } +template<> +PyObject* pyopencv_from(const std::vector &t) +{ + return pyopencv_from(std::vector(t.begin(), t.end())); +} + class pycvLayer CV_FINAL : public dnn::Layer { public: diff --git a/modules/dnn/misc/python/test/test_dnn.py b/modules/dnn/misc/python/test/test_dnn.py index 2d757b87d4..d71a9c5a15 100644 --- a/modules/dnn/misc/python/test/test_dnn.py +++ b/modules/dnn/misc/python/test/test_dnn.py @@ -117,6 +117,10 @@ class dnn_test(NewOpenCVTests): return False return True + def test_getAvailableTargets(self): + targets = cv.dnn.getAvailableTargets(cv.dnn.DNN_BACKEND_OPENCV) + self.assertTrue(cv.dnn.DNN_TARGET_CPU in targets) + def test_blobFromImage(self): np.random.seed(324) diff --git a/modules/dnn/src/darknet/darknet_io.cpp b/modules/dnn/src/darknet/darknet_io.cpp index e9938ecbb9..713624f728 100644 --- a/modules/dnn/src/darknet/darknet_io.cpp +++ b/modules/dnn/src/darknet/darknet_io.cpp @@ -556,6 +556,7 @@ namespace cv { { int kernel_size = getParam(layer_params, "size", -1); int pad = getParam(layer_params, "pad", 0); + int padding = getParam(layer_params, "padding", 0); int stride = getParam(layer_params, "stride", 1); int filters = getParam(layer_params, "filters", -1); bool batch_normalize = getParam(layer_params, "batch_normalize", 0) == 1; @@ -563,13 +564,13 @@ namespace cv { if (flipped == 1) CV_Error(cv::Error::StsNotImplemented, "Transpose the convolutional weights is not implemented"); - // correct the strange value of pad=1 for kernel_size=1 in the Darknet cfg-file - if (kernel_size < 3) pad = 0; + if (pad) + padding = kernel_size / 2; CV_Assert(kernel_size > 0 && filters > 0); CV_Assert(current_channels > 0); - setParams.setConvolution(kernel_size, pad, stride, filters, current_channels, + setParams.setConvolution(kernel_size, padding, stride, filters, current_channels, batch_normalize); current_channels = filters; diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 6b1d8bafbf..dfac30e3b3 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -109,7 +109,7 @@ public: #ifdef HAVE_INF_ENGINE static inline bool checkIETarget(Target target) { -#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2019R3) +#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R3) // Lightweight detection const std::vector devices = getCore().GetAvailableDevices(); for (std::vector::const_iterator i = devices.begin(); i != devices.end(); ++i) @@ -3098,7 +3098,9 @@ struct Net::Impl catch (const cv::Exception& e) { CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() throws exception." << - " inputs=" << is.size() << " outputs=" << os.size() << "/" << requiredOutputs); + " inputs=" << is.size() << + " outputs=" << os.size() << "/" << requiredOutputs << + " blobs=" << l->blobs.size()); for (size_t i = 0; i < is.size(); ++i) { CV_LOG_ERROR(NULL, " input[" << i << "] = " << toString(is[i])); @@ -3107,6 +3109,10 @@ struct Net::Impl { CV_LOG_ERROR(NULL, " output[" << i << "] = " << toString(os[i])); } + for (size_t i = 0; i < l->blobs.size(); ++i) + { + CV_LOG_ERROR(NULL, " blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i]))); + } CV_LOG_ERROR(NULL, "Exception message: " << e.what()); throw; } diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp index 97085cc12f..2a00880c42 100644 --- a/modules/dnn/src/ie_ngraph.cpp +++ b/modules/dnn/src/ie_ngraph.cpp @@ -323,7 +323,14 @@ void InfEngineNgraphNet::initPlugin(InferenceEngine::CNNNetwork& net) } // Some of networks can work without a library of extra layers. // OpenCV fallbacks as extensions. - ie.AddExtension(std::make_shared(), "CPU"); + try + { + ie.AddExtension(std::make_shared(), "CPU"); + } + catch(const std::exception& e) + { + CV_LOG_INFO(NULL, "DNN-IE: Can't register OpenCV custom layers extension: " << e.what()); + } #ifndef _WIN32 // Limit the number of CPU threads. if (device_name == "CPU") diff --git a/modules/dnn/src/layers/lrn_layer.cpp b/modules/dnn/src/layers/lrn_layer.cpp index ba4f71aa01..82267c9eb3 100644 --- a/modules/dnn/src/layers/lrn_layer.cpp +++ b/modules/dnn/src/layers/lrn_layer.cpp @@ -103,7 +103,7 @@ public: return bias == (int)bias; } if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { - return type == CHANNEL_NRM && bias == (int)bias; + return bias == (int)bias; } return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || @@ -471,7 +471,15 @@ public: alphaSize *= (type == SPATIAL_NRM ? size*size : size); auto& ieInpNode = nodes[0].dynamicCast()->node; - auto lrn = std::make_shared(ieInpNode, (double)alphaSize, (double)beta, (double)bias, (size_t)size); + std::vector axes; + if (type != SPATIAL_NRM) { + axes = {1}; + } else { + axes.resize(ieInpNode->get_shape().size() - 2); + std::iota(axes.begin(), axes.end(), 2); + } + auto ngraph_axes = std::make_shared(ngraph::element::i64, ngraph::Shape{axes.size()}, axes.data()); + auto lrn = std::make_shared(ieInpNode, ngraph_axes, alphaSize, beta, bias, size); return Ptr(new InfEngineNgraphNode(lrn)); } #endif // HAVE_DNN_NGRAPH diff --git a/modules/dnn/src/layers/mvn_layer.cpp b/modules/dnn/src/layers/mvn_layer.cpp index 5d0c267f49..94434957b1 100644 --- a/modules/dnn/src/layers/mvn_layer.cpp +++ b/modules/dnn/src/layers/mvn_layer.cpp @@ -119,8 +119,10 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { #ifdef HAVE_INF_ENGINE - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) return !zeroDev && (preferableTarget != DNN_TARGET_MYRIAD || eps <= 1e-7f); + else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return true; else #endif // HAVE_INF_ENGINE return backendId == DNN_BACKEND_OPENCV; diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp index 10c4161cfe..e27be30cd3 100644 --- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp +++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp @@ -98,7 +98,7 @@ private: class SoftMaxSubgraph : public Subgraph { public: - SoftMaxSubgraph() + SoftMaxSubgraph() : axis(1) { int input = addNodeToMatch(""); int inpExp = addNodeToMatch("Exp", input); diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index df9cd97ca5..39fcf36b27 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -147,8 +147,18 @@ Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto) } else { - char* val = const_cast(tensor_proto.raw_data().c_str()); - int64_t* src = reinterpret_cast(val); + const char* val = tensor_proto.raw_data().c_str(); + // Aligned pointer is required: https://github.com/opencv/opencv/issues/16373 + // this doesn't work: typedef int64_t CV_DECL_ALIGNED(1) unaligned_int64_t; + AutoBuffer aligned_val; + if (!isAligned(val)) + { + size_t sz = tensor_proto.raw_data().size(); + aligned_val.allocate(divUp(sz, sizeof(int64_t))); + memcpy(aligned_val.data(), val, sz); + val = (const char*)aligned_val.data(); + } + const int64_t* src = reinterpret_cast(val); convertInt64ToInt32(src, dst, blob.total()); } } diff --git a/modules/dnn/src/op_inf_engine.cpp b/modules/dnn/src/op_inf_engine.cpp index 58a6d7850e..2036d3aa03 100644 --- a/modules/dnn/src/op_inf_engine.cpp +++ b/modules/dnn/src/op_inf_engine.cpp @@ -574,7 +574,7 @@ InferenceEngine::Core& getCore() #if !defined(OPENCV_DNN_IE_VPU_TYPE_DEFAULT) static bool detectMyriadX_() { -#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2019R3) +#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R3) // Lightweight detection InferenceEngine::Core& ie = getCore(); const std::vector devices = ie.GetAvailableDevices(); @@ -739,7 +739,14 @@ void InfEngineBackendNet::initPlugin(InferenceEngine::CNNNetwork& net) // Some of networks can work without a library of extra layers. #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2019R1) // OpenCV fallbacks as extensions. - ie.AddExtension(std::make_shared(), "CPU"); + try + { + ie.AddExtension(std::make_shared(), "CPU"); + } + catch(const std::exception& e) + { + CV_LOG_INFO(NULL, "DNN-IE: Can't register OpenCV custom layers extension: " << e.what()); + } #endif #ifndef _WIN32 // Limit the number of CPU threads. @@ -1068,8 +1075,14 @@ void resetMyriadDevice() #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1) getSharedPlugins().erase("MYRIAD"); #else - // To unregister both "MYRIAD" and "HETERO:MYRIAD,CPU" plugins - getCore() = InferenceEngine::Core(); + // Unregister both "MYRIAD" and "HETERO:MYRIAD,CPU" plugins + InferenceEngine::Core& ie = getCore(); + try + { + ie.UnregisterPlugin("MYRIAD"); + ie.UnregisterPlugin("HETERO"); + } + catch (...) {} #endif #endif // HAVE_INF_ENGINE } diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index 7545b35b8e..c1c84078a9 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -106,7 +106,7 @@ public: std::string cfg = findDataFile("dnn/darknet/" + name + ".cfg"); std::string model = ""; if (hasWeights) - model = findDataFile("dnn/darknet/" + name + ".weights", false); + model = findDataFile("dnn/darknet/" + name + ".weights"); checkBackend(&inp, &ref); @@ -554,6 +554,15 @@ TEST_P(Test_Darknet_layers, reorg) testDarknetLayer("reorg"); } +TEST_P(Test_Darknet_layers, convolutional) +{ + if (target == DNN_TARGET_MYRIAD) + { + default_l1 = 0.01f; + } + testDarknetLayer("convolutional", true); +} + INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_layers, dnnBackendsAndTargets()); }} // namespace diff --git a/modules/imgcodecs/src/loadsave.cpp b/modules/imgcodecs/src/loadsave.cpp index 5dcd241f8c..061bfdfc0e 100644 --- a/modules/imgcodecs/src/loadsave.cpp +++ b/modules/imgcodecs/src/loadsave.cpp @@ -419,12 +419,12 @@ imread_( const String& filename, int flags, Mat& mat ) int scale_denom = 1; if( flags > IMREAD_LOAD_GDAL ) { - if( flags & IMREAD_REDUCED_GRAYSCALE_2 ) - scale_denom = 2; - else if( flags & IMREAD_REDUCED_GRAYSCALE_4 ) - scale_denom = 4; - else if( flags & IMREAD_REDUCED_GRAYSCALE_8 ) - scale_denom = 8; + if( flags & IMREAD_REDUCED_GRAYSCALE_2 ) + scale_denom = 2; + else if( flags & IMREAD_REDUCED_GRAYSCALE_4 ) + scale_denom = 4; + else if( flags & IMREAD_REDUCED_GRAYSCALE_8 ) + scale_denom = 8; } /// set the scale_denom in the driver @@ -738,6 +738,20 @@ imdecode_( const Mat& buf, int flags, Mat& mat ) if( !decoder ) return 0; + int scale_denom = 1; + if( flags > IMREAD_LOAD_GDAL ) + { + if( flags & IMREAD_REDUCED_GRAYSCALE_2 ) + scale_denom = 2; + else if( flags & IMREAD_REDUCED_GRAYSCALE_4 ) + scale_denom = 4; + else if( flags & IMREAD_REDUCED_GRAYSCALE_8 ) + scale_denom = 8; + } + + /// set the scale_denom in the driver + decoder->setScale( scale_denom ); + if( !decoder->setSource(buf_row) ) { filename = tempfile(); @@ -816,7 +830,7 @@ imdecode_( const Mat& buf, int flags, Mat& mat ) { std::cerr << "imdecode_('" << filename << "'): can't read data: unknown exception" << std::endl << std::flush; } - decoder.release(); + if (!filename.empty()) { if (0 != remove(filename.c_str())) @@ -831,6 +845,11 @@ imdecode_( const Mat& buf, int flags, Mat& mat ) return false; } + if( decoder->setScale( scale_denom ) > 1 ) // if decoder is JpegDecoder then decoder->setScale always returns 1 + { + resize(mat, mat, Size( size.width / scale_denom, size.height / scale_denom ), 0, 0, INTER_LINEAR_EXACT); + } + return true; } diff --git a/modules/imgcodecs/test/test_precomp.hpp b/modules/imgcodecs/test/test_precomp.hpp index 4fc692cfac..3bd4221dae 100644 --- a/modules/imgcodecs/test/test_precomp.hpp +++ b/modules/imgcodecs/test/test_precomp.hpp @@ -8,4 +8,68 @@ #include "opencv2/imgcodecs.hpp" #include "opencv2/imgproc/imgproc_c.h" +namespace cv { + +static inline +void PrintTo(const ImreadModes& val, std::ostream* os) +{ + int v = val; + if (v == IMREAD_UNCHANGED && (v & IMREAD_IGNORE_ORIENTATION) != 0) + { + CV_Assert(IMREAD_UNCHANGED == -1); + *os << "IMREAD_UNCHANGED"; + return; + } + if ((v & IMREAD_COLOR) != 0) + { + CV_Assert(IMREAD_COLOR == 1); + v &= ~IMREAD_COLOR; + *os << "IMREAD_COLOR" << (v == 0 ? "" : " | "); + } + else + { + CV_Assert(IMREAD_GRAYSCALE == 0); + *os << "IMREAD_GRAYSCALE" << (v == 0 ? "" : " | "); + } + if ((v & IMREAD_ANYDEPTH) != 0) + { + v &= ~IMREAD_ANYDEPTH; + *os << "IMREAD_ANYDEPTH" << (v == 0 ? "" : " | "); + } + if ((v & IMREAD_ANYCOLOR) != 0) + { + v &= ~IMREAD_ANYCOLOR; + *os << "IMREAD_ANYCOLOR" << (v == 0 ? "" : " | "); + } + if ((v & IMREAD_LOAD_GDAL) != 0) + { + v &= ~IMREAD_LOAD_GDAL; + *os << "IMREAD_LOAD_GDAL" << (v == 0 ? "" : " | "); + } + if ((v & IMREAD_IGNORE_ORIENTATION) != 0) + { + v &= ~IMREAD_IGNORE_ORIENTATION; + *os << "IMREAD_IGNORE_ORIENTATION" << (v == 0 ? "" : " | "); + } + switch (v) + { + case IMREAD_UNCHANGED: return; + case IMREAD_GRAYSCALE: return; + case IMREAD_COLOR: return; + case IMREAD_ANYDEPTH: return; + case IMREAD_ANYCOLOR: return; + case IMREAD_LOAD_GDAL: return; + case IMREAD_REDUCED_GRAYSCALE_2: // fallthru + case IMREAD_REDUCED_COLOR_2: *os << "REDUCED_2"; return; + case IMREAD_REDUCED_GRAYSCALE_4: // fallthru + case IMREAD_REDUCED_COLOR_4: *os << "REDUCED_4"; return; + case IMREAD_REDUCED_GRAYSCALE_8: // fallthru + case IMREAD_REDUCED_COLOR_8: *os << "REDUCED_8"; return; + case IMREAD_IGNORE_ORIENTATION: return; + } // don't use "default:" to emit compiler warnings + *os << "IMREAD_UNKNOWN(" << (int)v << ")"; +} + +} // namespace + #endif diff --git a/modules/imgcodecs/test/test_read_write.cpp b/modules/imgcodecs/test/test_read_write.cpp index f0f4139610..6396200ec9 100644 --- a/modules/imgcodecs/test/test_read_write.cpp +++ b/modules/imgcodecs/test/test_read_write.cpp @@ -5,6 +5,95 @@ namespace opencv_test { namespace { +/* < , > */ +typedef tuple< tuple, tuple > Imgcodecs_Resize_t; + +typedef testing::TestWithParam< Imgcodecs_Resize_t > Imgcodecs_Resize; + +/* resize_flag_and_dims = */ +const tuple resize_flag_and_dims[] = +{ + make_tuple(IMREAD_UNCHANGED, 1), + make_tuple(IMREAD_REDUCED_GRAYSCALE_2, 2), + make_tuple(IMREAD_REDUCED_GRAYSCALE_4, 4), + make_tuple(IMREAD_REDUCED_GRAYSCALE_8, 8), + make_tuple(IMREAD_REDUCED_COLOR_2, 2), + make_tuple(IMREAD_REDUCED_COLOR_4, 4), + make_tuple(IMREAD_REDUCED_COLOR_8, 8) +}; + +const tuple images[] = +{ +#ifdef HAVE_JPEG + make_tuple("../cv/imgproc/stuff.jpg", Size(640, 480)), +#endif +#ifdef HAVE_PNG + make_tuple("../cv/shared/pic1.png", Size(400, 300)), +#endif +}; + +TEST_P(Imgcodecs_Resize, imread_reduce_flags) +{ + const string file_name = findDataFile(get<0>(get<0>(GetParam()))); + const Size imageSize = get<1>(get<0>(GetParam())); + + const int imread_flag = get<0>(get<1>(GetParam())); + const int scale = get<1>(get<1>(GetParam())); + + const int cols = imageSize.width / scale; + const int rows = imageSize.height / scale; + { + Mat img = imread(file_name, imread_flag); + ASSERT_FALSE(img.empty()); + EXPECT_EQ(cols, img.cols); + EXPECT_EQ(rows, img.rows); + } +} + +//================================================================================================== + +TEST_P(Imgcodecs_Resize, imdecode_reduce_flags) +{ + const string file_name = findDataFile(get<0>(get<0>(GetParam()))); + const Size imageSize = get<1>(get<0>(GetParam())); + + const int imread_flag = get<0>(get<1>(GetParam())); + const int scale = get<1>(get<1>(GetParam())); + + const int cols = imageSize.width / scale; + const int rows = imageSize.height / scale; + + const std::ios::openmode mode = std::ios::in | std::ios::binary; + std::ifstream ifs(file_name.c_str(), mode); + ASSERT_TRUE(ifs.is_open()); + + ifs.seekg(0, std::ios::end); + const size_t sz = static_cast(ifs.tellg()); + ifs.seekg(0, std::ios::beg); + + std::vector content(sz); + ifs.read((char*)content.data(), sz); + ASSERT_FALSE(ifs.fail()); + + { + Mat img = imdecode(Mat(content), imread_flag); + ASSERT_FALSE(img.empty()); + EXPECT_EQ(cols, img.cols); + EXPECT_EQ(rows, img.rows); + } +} + +//================================================================================================== + +INSTANTIATE_TEST_CASE_P(/*nothing*/, Imgcodecs_Resize, + testing::Combine( + testing::ValuesIn(images), + testing::ValuesIn(resize_flag_and_dims) + ) + ); + +//================================================================================================== + TEST(Imgcodecs_Image, read_write_bmp) { const size_t IMAGE_COUNT = 10; diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index 861a1f2eac..7596217b73 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -1526,7 +1526,7 @@ struct HResizeLinearVec_X4 struct HResizeLinearVecU8_X4 { int operator()(const uchar** src, int** dst, int count, const int* xofs, - const short* alpha/*[xmax]*/, int smax, int /*dmax*/, int cn, int /*xmin*/, int xmax) const + const short* alpha/*[xmax]*/, int /*smax*/, int dmax, int cn, int /*xmin*/, int xmax) const { int dx = 0, k = 0; @@ -1612,17 +1612,11 @@ struct HResizeLinearVecU8_X4 } else if(cn == 3) { - int len0 = xmax - cn; - - /* This may need to trim 1 or more extra units depending on the amount of - scaling. Test until we find the first value which we know cannot overrun. */ - while (len0 >= cn && - xofs[len0 - cn] + cn >= smax - cn // check access: v_load_expand_q(S+xofs[dx]+cn) - ) - { - len0 -= cn; - } - CV_DbgAssert(len0 <= 0 || len0 >= cn); + /* Peek at the last x offset to find the maximal s offset. We know the loop + will terminate prior to value which may be 1 or more elements prior to the + final valid offset. xofs[] is constucted to be an array of increasingly + large offsets (i.e xofs[x] <= xofs[x+1] for x < xmax). */ + int smax = xofs[dmax-cn]; for( ; k <= (count - 2); k+=2 ) { @@ -1631,7 +1625,7 @@ struct HResizeLinearVecU8_X4 const uchar *S1 = src[k+1]; int *D1 = dst[k+1]; - for( dx = 0; dx < len0; dx += cn ) + for( dx = 0; (xofs[dx] + cn) < smax; dx += cn ) { v_int16x8 a = v_load(alpha+dx*2); v_store(&D0[dx], v_dotprod(v_reinterpret_as_s16(v_load_expand_q(S0+xofs[dx]) | (v_load_expand_q(S0+xofs[dx]+cn)<<16)), a)); @@ -1642,12 +1636,14 @@ struct HResizeLinearVecU8_X4 { const uchar *S = src[k]; int *D = dst[k]; - for( dx = 0; dx < len0; dx += cn ) + for( dx = 0; (xofs[dx] + cn) < smax; dx += cn ) { v_int16x8 a = v_load(alpha+dx*2); v_store(&D[dx], v_dotprod(v_reinterpret_as_s16(v_load_expand_q(S+xofs[dx]) | (v_load_expand_q(S+xofs[dx]+cn)<<16)), a)); } } + /* Debug check to ensure truthiness that we never vector the final value. */ + CV_DbgAssert(dx < dmax); } else if(cn == 4) { diff --git a/modules/photo/src/seamless_cloning.cpp b/modules/photo/src/seamless_cloning.cpp index ee0045334f..d09203577b 100644 --- a/modules/photo/src/seamless_cloning.cpp +++ b/modules/photo/src/seamless_cloning.cpp @@ -51,8 +51,8 @@ static Mat checkMask(InputArray _mask, Size size) { Mat mask = _mask.getMat(); Mat gray; - if (mask.channels() == 3) - cvtColor(mask, gray, COLOR_BGR2GRAY); + if (mask.channels() > 1) + cvtColor(mask, gray, COLOR_BGRA2GRAY); else { if (mask.empty()) diff --git a/modules/ts/src/ts.cpp b/modules/ts/src/ts.cpp index d710b2d0c6..2c6f687bb5 100644 --- a/modules/ts/src/ts.cpp +++ b/modules/ts/src/ts.cpp @@ -1008,10 +1008,10 @@ static std::string findData(const std::string& relative_path, bool required, boo CHECK_FILE_WITH_PREFIX(prefix, result_); if (!required && !result_.empty()) { - std::cout << "TEST ERROR: Don't use 'optional' findData() for " << relative_path << std::endl; static bool checkOptionalFlag = cv::utils::getConfigurationParameterBool("OPENCV_TEST_CHECK_OPTIONAL_DATA", false); if (checkOptionalFlag) { + std::cout << "TEST ERROR: Don't use 'optional' findData() for " << relative_path << std::endl; CV_Assert(required || result_.empty()); } }