diff --git a/3rdparty/carotene/CMakeLists.txt b/3rdparty/carotene/CMakeLists.txt index bd26a2d7ef..3d49a2def6 100644 --- a/3rdparty/carotene/CMakeLists.txt +++ b/3rdparty/carotene/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8.11 FATAL_ERROR) +cmake_minimum_required(VERSION ${MIN_VER_CMAKE} FATAL_ERROR) project(Carotene) diff --git a/3rdparty/carotene/hal/CMakeLists.txt b/3rdparty/carotene/hal/CMakeLists.txt index 592771c676..70c40180e8 100644 --- a/3rdparty/carotene/hal/CMakeLists.txt +++ b/3rdparty/carotene/hal/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8.8 FATAL_ERROR) +cmake_minimum_required(VERSION ${MIN_VER_CMAKE} FATAL_ERROR) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake index 1c40ee7a1b..cc0b5f8216 100644 --- a/cmake/OpenCVCompilerOptimizations.cmake +++ b/cmake/OpenCVCompilerOptimizations.cmake @@ -46,7 +46,7 @@ set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX_512F") list(APPEND CPU_ALL_OPTIMIZATIONS "AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CLX;AVX512_ICL") -list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16) +list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16 NEON_DOTPROD) list(APPEND CPU_ALL_OPTIMIZATIONS MSA) list(APPEND CPU_ALL_OPTIMIZATIONS VSX VSX3) list(APPEND CPU_ALL_OPTIMIZATIONS RVV) @@ -329,6 +329,7 @@ if(X86 OR X86_64) elseif(ARM OR AARCH64) ocv_update(CPU_NEON_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_neon.cpp") ocv_update(CPU_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp") + ocv_update(CPU_NEON_DOTPROD_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_dotprod.cpp") if(NOT AARCH64) ocv_update(CPU_KNOWN_OPTIMIZATIONS "VFPV3;NEON;FP16") if(NOT MSVC) @@ -340,9 +341,11 @@ elseif(ARM OR AARCH64) endif() ocv_update(CPU_FP16_IMPLIES "NEON") else() - ocv_update(CPU_KNOWN_OPTIMIZATIONS "NEON;FP16") + ocv_update(CPU_KNOWN_OPTIMIZATIONS "NEON;FP16;NEON_DOTPROD") ocv_update(CPU_NEON_FLAGS_ON "") ocv_update(CPU_FP16_IMPLIES "NEON") + ocv_update(CPU_NEON_DOTPROD_FLAGS_ON "-march=armv8.2-a+dotprod") + ocv_update(CPU_NEON_DOTPROD_IMPLIES "NEON") set(CPU_BASELINE "NEON;FP16" CACHE STRING "${HELP_CPU_BASELINE}") endif() elseif(MIPS) diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index 4f5c353980..53c1d8551e 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -136,7 +136,7 @@ if(CV_GCC OR CV_CLANG) endif() add_extra_compiler_option(-Wsign-promo) add_extra_compiler_option(-Wuninitialized) - if(CV_GCC AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 6.0) AND (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0)) + if(CV_GCC AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 6.0) AND (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0 OR ARM)) add_extra_compiler_option(-Wno-psabi) endif() if(HAVE_CXX11) diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index 10f1288141..acc101396c 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -253,12 +253,13 @@ if(CUDA_FOUND) endif() if(NOT _nvcc_res EQUAL 0) message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.") - # TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0) + # TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0) Orin (8.7) ocv_filter_available_architecture(__cuda_arch_bin 5.3 6.2 7.2 7.0 + 8.7 ) else() set(__cuda_arch_bin "${_nvcc_out}") diff --git a/cmake/OpenCVDetectHalide.cmake b/cmake/OpenCVDetectHalide.cmake index 4828c299ae..dc484a7d3d 100644 --- a/cmake/OpenCVDetectHalide.cmake +++ b/cmake/OpenCVDetectHalide.cmake @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION ${MIN_VER_CMAKE}) if(" ${HALIDE_ROOT_DIR}" STREQUAL " ") unset(HALIDE_ROOT_DIR CACHE) diff --git a/cmake/checks/cpu_dotprod.cpp b/cmake/checks/cpu_dotprod.cpp new file mode 100644 index 0000000000..4f39c50659 --- /dev/null +++ b/cmake/checks/cpu_dotprod.cpp @@ -0,0 +1,24 @@ +#include + +#if defined __GNUC__ && (defined __arm__ || defined __aarch64__) +#include "arm_neon.h" +int test() +{ + const unsigned int src[] = { 0, 0, 0, 0 }; + unsigned int dst[4]; + uint32x4_t v_src = *(uint32x4_t*)src; + uint8x16_t v_m0 = *(uint8x16_t*)src; + uint8x16_t v_m1 = *(uint8x16_t*)src; + uint32x4_t v_dst = vdotq_u32(v_src, v_m0, v_m1); + *(uint32x4_t*)dst = v_dst; + return (int)dst[0]; +} +#else +#error "DOTPROD is not supported" +#endif + +int main() +{ + printf("%d\n", test()); + return 0; +} diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp index 35638b44f4..893eb22761 100644 --- a/modules/calib3d/include/opencv2/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d.hpp @@ -2692,7 +2692,7 @@ CV_EXPORTS_W int recoverPose( InputArray points1, InputArray points2, InputOutputArray mask = noArray()); /** @brief Recovers the relative camera rotation and the translation from an estimated essential -matrix and the corresponding points in two images, using cheirality check. Returns the number of +matrix and the corresponding points in two images, using chirality check. Returns the number of inliers that pass the check. @param E The input essential matrix. @@ -2710,11 +2710,11 @@ described below. therefore is only known up to scale, i.e. t is the direction of the translation vector and has unit length. @param mask Input/output mask for inliers in points1 and points2. If it is not empty, then it marks -inliers in points1 and points2 for then given essential matrix E. Only these inliers will be used to -recover pose. In the output mask only inliers which pass the cheirality check. +inliers in points1 and points2 for the given essential matrix E. Only these inliers will be used to +recover pose. In the output mask only inliers which pass the chirality check. This function decomposes an essential matrix using @ref decomposeEssentialMat and then verifies -possible pose hypotheses by doing cheirality check. The cheirality check means that the +possible pose hypotheses by doing chirality check. The chirality check means that the triangulated 3D points should have positive depth. Some details can be found in @cite Nister03. This function can be used to process the output E and mask from @ref findEssentialMat. In this @@ -2761,8 +2761,8 @@ length. are feature points from cameras with same focal length and principal point. @param pp principal point of the camera. @param mask Input/output mask for inliers in points1 and points2. If it is not empty, then it marks -inliers in points1 and points2 for then given essential matrix E. Only these inliers will be used to -recover pose. In the output mask only inliers which pass the cheirality check. +inliers in points1 and points2 for the given essential matrix E. Only these inliers will be used to +recover pose. In the output mask only inliers which pass the chirality check. This function differs from the one above that it computes camera intrinsic matrix from focal length and principal point: @@ -2797,12 +2797,12 @@ length. @param distanceThresh threshold distance which is used to filter out far away points (i.e. infinite points). @param mask Input/output mask for inliers in points1 and points2. If it is not empty, then it marks -inliers in points1 and points2 for then given essential matrix E. Only these inliers will be used to -recover pose. In the output mask only inliers which pass the cheirality check. +inliers in points1 and points2 for the given essential matrix E. Only these inliers will be used to +recover pose. In the output mask only inliers which pass the chirality check. @param triangulatedPoints 3D points which were reconstructed by triangulation. This function differs from the one above that it outputs the triangulated 3D point that are used for -the cheirality check. +the chirality check. */ CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray points2, InputArray cameraMatrix, OutputArray R, OutputArray t, double distanceThresh, InputOutputArray mask = noArray(), diff --git a/modules/calib3d/src/calibinit.cpp b/modules/calib3d/src/calibinit.cpp index 9e96802388..7b5e1db9f9 100644 --- a/modules/calib3d/src/calibinit.cpp +++ b/modules/calib3d/src/calibinit.cpp @@ -1232,7 +1232,7 @@ int ChessBoardDetector::cleanFoundConnectedQuads(std::vector& q centers[i] = ci; center += ci; } - center.x *= (1.0f / quad_count); + center *= (1.0f / quad_count); // If we still have more quadrangles than we should, // we try to eliminate bad ones based on minimizing the bounding box. @@ -1256,7 +1256,7 @@ int ChessBoardDetector::cleanFoundConnectedQuads(std::vector& q Mat points(1, quad_count, CV_32FC2, ¢ers[0]); cv::convexHull(points, hull, true); centers[skip] = temp; - double hull_area = contourArea(hull, true); + double hull_area = contourArea(hull, false); // remember smallest box area if (hull_area < min_box_area) @@ -1298,6 +1298,7 @@ int ChessBoardDetector::cleanFoundConnectedQuads(std::vector& q quad_group[min_box_area_index] = quad_group[quad_count]; centers[min_box_area_index] = centers[quad_count]; } + quad_group.resize(quad_count); return quad_count; } diff --git a/modules/calib3d/src/five-point.cpp b/modules/calib3d/src/five-point.cpp index d947334142..735f8e6b85 100644 --- a/modules/calib3d/src/five-point.cpp +++ b/modules/calib3d/src/five-point.cpp @@ -601,7 +601,7 @@ int cv::recoverPose( InputArray E, InputArray _points1, InputArray _points2, P3(Range::all(), Range(0, 3)) = R1 * 1.0; P3.col(3) = -t * 1.0; P4(Range::all(), Range(0, 3)) = R2 * 1.0; P4.col(3) = -t * 1.0; - // Do the cheirality check. + // Do the chirality check. // Notice here a threshold dist is used to filter // out far away points (i.e. infinite points) since // their depth may vary between positive and negative. diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index 0ed2ad59b1..eab909843b 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -6,7 +6,7 @@ ocv_add_dispatched_file(arithm SSE2 SSE4_1 AVX2 VSX3) ocv_add_dispatched_file(convert SSE2 AVX2 VSX3) ocv_add_dispatched_file(convert_scale SSE2 AVX2) ocv_add_dispatched_file(count_non_zero SSE2 AVX2) -ocv_add_dispatched_file(matmul SSE2 SSE4_1 AVX2 AVX512_SKX) +ocv_add_dispatched_file(matmul SSE2 SSE4_1 AVX2 AVX512_SKX NEON_DOTPROD) ocv_add_dispatched_file(mean SSE2 AVX2) ocv_add_dispatched_file(merge SSE2 AVX2) ocv_add_dispatched_file(split SSE2 AVX2) diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h index ab5a67d4c8..12e4cb47b8 100644 --- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h +++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h @@ -79,6 +79,10 @@ # endif # define CV_FP16 1 #endif +#ifdef CV_CPU_COMPILE_NEON_DOTPROD +# include +# define CV_NEON_DOT 1 +#endif #ifdef CV_CPU_COMPILE_AVX2 # include # define CV_AVX2 1 diff --git a/modules/core/include/opencv2/core/cv_cpu_helper.h b/modules/core/include/opencv2/core/cv_cpu_helper.h index 39ae0b91f7..91b853de0c 100644 --- a/modules/core/include/opencv2/core/cv_cpu_helper.h +++ b/modules/core/include/opencv2/core/cv_cpu_helper.h @@ -420,6 +420,27 @@ #endif #define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...) CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON_DOTPROD +# define CV_TRY_NEON_DOTPROD 1 +# define CV_CPU_FORCE_NEON_DOTPROD 1 +# define CV_CPU_HAS_SUPPORT_NEON_DOTPROD 1 +# define CV_CPU_CALL_NEON_DOTPROD(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_NEON_DOTPROD_(fn, args) return (opt_NEON_DOTPROD::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON_DOTPROD +# define CV_TRY_NEON_DOTPROD 1 +# define CV_CPU_FORCE_NEON_DOTPROD 0 +# define CV_CPU_HAS_SUPPORT_NEON_DOTPROD (cv::checkHardwareSupport(CV_CPU_NEON_DOTPROD)) +# define CV_CPU_CALL_NEON_DOTPROD(fn, args) if (CV_CPU_HAS_SUPPORT_NEON_DOTPROD) return (opt_NEON_DOTPROD::fn args) +# define CV_CPU_CALL_NEON_DOTPROD_(fn, args) if (CV_CPU_HAS_SUPPORT_NEON_DOTPROD) return (opt_NEON_DOTPROD::fn args) +#else +# define CV_TRY_NEON_DOTPROD 0 +# define CV_CPU_FORCE_NEON_DOTPROD 0 +# define CV_CPU_HAS_SUPPORT_NEON_DOTPROD 0 +# define CV_CPU_CALL_NEON_DOTPROD(fn, args) +# define CV_CPU_CALL_NEON_DOTPROD_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_NEON_DOTPROD(fn, args, mode, ...) CV_CPU_CALL_NEON_DOTPROD(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_MSA # define CV_TRY_MSA 1 # define CV_CPU_FORCE_MSA 1 diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index f785f32b6e..21e3792162 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -268,6 +268,7 @@ namespace cv { #define CV_CPU_AVX_5124FMAPS 27 #define CV_CPU_NEON 100 +#define CV_CPU_NEON_DOTPROD 101 #define CV_CPU_MSA 150 @@ -324,6 +325,7 @@ enum CpuFeatures { CPU_AVX_5124FMAPS = 27, CPU_NEON = 100, + CPU_NEON_DOTPROD = 101, CPU_MSA = 150, diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index 28cf813379..0d6fde5c41 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -78,8 +78,6 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN #define CV_NEON_AARCH64 0 #endif -// TODO -#define CV_NEON_DOT 0 //////////// Utils //////////// @@ -665,11 +663,22 @@ inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64 } // 8 >> 32 +#ifdef CV_NEON_DOT +#define OPENCV_HAL_IMPL_NEON_DOT_PRODUCT_OP(_Tpvec1, _Tpvec2, suffix) \ +inline _Tpvec1 v_dotprod_expand(const _Tpvec2& a, const _Tpvec2& b) \ +{ \ + return _Tpvec1(vdotq_##suffix(vdupq_n_##suffix(0), a.val, b.val));\ +} \ +inline _Tpvec1 v_dotprod_expand(const _Tpvec2& a, const _Tpvec2& b, const _Tpvec1& c) \ +{ \ + return _Tpvec1(vdotq_##suffix(c.val, a.val, b.val)); \ +} + +OPENCV_HAL_IMPL_NEON_DOT_PRODUCT_OP(v_uint32x4, v_uint8x16, u32) +OPENCV_HAL_IMPL_NEON_DOT_PRODUCT_OP(v_int32x4, v_int8x16, s32) +#else inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) { -#if CV_NEON_DOT - return v_uint32x4(vdotq_u32(vdupq_n_u32(0), a.val, b.val)); -#else const uint8x16_t zero = vreinterpretq_u8_u32(vdupq_n_u32(0)); const uint8x16_t mask = vreinterpretq_u8_u32(vdupq_n_u32(0x00FF00FF)); const uint16x8_t zero32 = vreinterpretq_u16_u32(vdupq_n_u32(0)); @@ -685,23 +694,15 @@ inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) uint32x4_t s1 = vaddq_u32(vshrq_n_u32(vreinterpretq_u32_u16(even), 16), vshrq_n_u32(vreinterpretq_u32_u16(odd), 16)); return v_uint32x4(vaddq_u32(s0, s1)); -#endif } inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) { -#if CV_NEON_DOT - return v_uint32x4(vdotq_u32(c.val, a.val, b.val)); -#else return v_dotprod_expand(a, b) + c; -#endif } inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) { -#if CV_NEON_DOT - return v_int32x4(vdotq_s32(vdupq_n_s32(0), a.val, b.val)); -#else int16x8_t p0 = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val)); int16x8_t p1 = vmull_s8(vget_high_s8(a.val), vget_high_s8(b.val)); int16x8_t uzp1, uzp2; @@ -710,18 +711,13 @@ inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) int16x4_t uzpl1, uzpl2; _v128_unzip(vget_low_s16(sum), vget_high_s16(sum), uzpl1, uzpl2); return v_int32x4(vaddl_s16(uzpl1, uzpl2)); -#endif } inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) { -#if CV_NEON_DOT - return v_int32x4(vdotq_s32(c.val, a.val, b.val)); -#else return v_dotprod_expand(a, b) + c; -#endif } - +#endif // 16 >> 64 inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) { @@ -830,45 +826,44 @@ inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_ } // 8 >> 32 +#ifdef CV_NEON_DOT +#define OPENCV_HAL_IMPL_NEON_DOT_PRODUCT_FAST_OP(_Tpvec1, _Tpvec2, suffix) \ +inline _Tpvec1 v_dotprod_expand_fast(const _Tpvec2& a, const _Tpvec2& b) \ +{ \ + return v_dotprod_expand(a, b); \ +} \ +inline _Tpvec1 v_dotprod_expand_fast(const _Tpvec2& a, const _Tpvec2& b, const _Tpvec1& c) \ +{ \ + return v_dotprod_expand(a, b, c); \ +} + +OPENCV_HAL_IMPL_NEON_DOT_PRODUCT_FAST_OP(v_uint32x4, v_uint8x16, u32) +OPENCV_HAL_IMPL_NEON_DOT_PRODUCT_FAST_OP(v_int32x4, v_int8x16, s32) +#else inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) { -#if CV_NEON_DOT - return v_uint32x4(vdotq_u32(vdupq_n_u32(0), a.val, b.val)); -#else uint16x8_t p0 = vmull_u8(vget_low_u8(a.val), vget_low_u8(b.val)); uint16x8_t p1 = vmull_u8(vget_high_u8(a.val), vget_high_u8(b.val)); uint32x4_t s0 = vaddl_u16(vget_low_u16(p0), vget_low_u16(p1)); uint32x4_t s1 = vaddl_u16(vget_high_u16(p0), vget_high_u16(p1)); return v_uint32x4(vaddq_u32(s0, s1)); -#endif } inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) { -#if CV_NEON_DOT - return v_uint32x4(vdotq_u32(c.val, a.val, b.val)); -#else return v_dotprod_expand_fast(a, b) + c; -#endif } inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) { -#if CV_NEON_DOT - return v_int32x4(vdotq_s32(vdupq_n_s32(0), a.val, b.val)); -#else int16x8_t prod = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val)); prod = vmlal_s8(prod, vget_high_s8(a.val), vget_high_s8(b.val)); return v_int32x4(vaddl_s16(vget_low_s16(prod), vget_high_s16(prod))); -#endif } inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) { -#if CV_NEON_DOT - return v_int32x4(vdotq_s32(c.val, a.val, b.val)); -#else return v_dotprod_expand_fast(a, b) + c; -#endif } +#endif // 16 >> 64 inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) diff --git a/modules/core/src/buffer_area.cpp b/modules/core/src/buffer_area.cpp index 2fe9d782ae..91e6cb966f 100644 --- a/modules/core/src/buffer_area.cpp +++ b/modules/core/src/buffer_area.cpp @@ -29,8 +29,7 @@ public: } void cleanup() const { - CV_Assert(ptr && *ptr); - *ptr = 0; + CV_DbgAssert(ptr); if (raw_mem) fastFree(raw_mem); } diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index cac5cd11f1..4e7e1b7ea0 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -305,6 +305,9 @@ DECLARE_CV_CPUID_X86 #endif #endif +#if defined CV_CXX11 + #include +#endif namespace cv { @@ -414,6 +417,7 @@ struct HWFeatures g_hwFeatureNames[CPU_AVX_5124FMAPS] = "AVX5124FMAPS"; g_hwFeatureNames[CPU_NEON] = "NEON"; + g_hwFeatureNames[CPU_NEON_DOTPROD] = "NEON_DOTPROD"; g_hwFeatureNames[CPU_VSX] = "VSX"; g_hwFeatureNames[CPU_VSX3] = "VSX3"; @@ -561,6 +565,24 @@ struct HWFeatures #ifdef __aarch64__ have[CV_CPU_NEON] = true; have[CV_CPU_FP16] = true; + int cpufile = open("/proc/self/auxv", O_RDONLY); + + if (cpufile >= 0) + { + Elf64_auxv_t auxv; + const size_t size_auxv_t = sizeof(auxv); + + while ((size_t)read(cpufile, &auxv, size_auxv_t) == size_auxv_t) + { + if (auxv.a_type == AT_HWCAP) + { + have[CV_CPU_NEON_DOTPROD] = (auxv.a_un.a_val & (1 << 20)) != 0; + break; + } + } + + close(cpufile); + } #elif defined __arm__ && defined __ANDROID__ #if defined HAVE_CPUFEATURES CV_LOG_INFO(NULL, "calling android_getCpuFeatures() ..."); @@ -853,7 +875,10 @@ bool useOptimized(void) int64 getTickCount(void) { -#if defined _WIN32 || defined WINCE +#if defined CV_CXX11 + std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now(); + return (int64)now.time_since_epoch().count(); +#elif defined _WIN32 || defined WINCE LARGE_INTEGER counter; QueryPerformanceCounter( &counter ); return (int64)counter.QuadPart; @@ -872,7 +897,11 @@ int64 getTickCount(void) double getTickFrequency(void) { -#if defined _WIN32 || defined WINCE +#if defined CV_CXX11 + using clock_period_t = std::chrono::steady_clock::duration::period; + double clock_freq = clock_period_t::den / clock_period_t::num; + return clock_freq; +#elif defined _WIN32 || defined WINCE LARGE_INTEGER freq; QueryPerformanceFrequency(&freq); return (double)freq.QuadPart; diff --git a/modules/core/test/test_utils.cpp b/modules/core/test/test_utils.cpp index c31ca75667..a43ea78381 100644 --- a/modules/core/test/test_utils.cpp +++ b/modules/core/test/test_utils.cpp @@ -408,9 +408,6 @@ TEST_P(BufferArea, basic) EXPECT_EQ((double)0, dbl_ptr[i]); } } - EXPECT_TRUE(int_ptr == NULL); - EXPECT_TRUE(uchar_ptr == NULL); - EXPECT_TRUE(dbl_ptr == NULL); } TEST_P(BufferArea, align) @@ -447,10 +444,6 @@ TEST_P(BufferArea, align) } } } - for (size_t i = 0; i < CNT; ++i) - { - EXPECT_TRUE(buffers[i] == NULL); - } } TEST_P(BufferArea, default_align) diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp index d727734cf3..5338ab2215 100644 --- a/modules/dnn/src/layers/scale_layer.cpp +++ b/modules/dnn/src/layers/scale_layer.cpp @@ -132,6 +132,16 @@ public: if (hasWeights && hasBias) CV_CheckEQ(weights.total(), bias.total(), "Incompatible weights/bias blobs"); + if (weights.total() == 1) + { + // The total() of bias should be same as weights. + if (hasBias) + inpBlob.convertTo(outBlob, CV_32F, weights.at(0), bias.at(0)); + else + inpBlob.convertTo(outBlob, CV_32F, weights.at(0)); + return; + } + int endAxis; for (endAxis = axis + 1; endAxis <= inpBlob.dims; ++endAxis) { diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 41949257ef..1d6ce96612 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -2026,6 +2026,8 @@ void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::Node void findBroadAxis(const MatShape& broadShape, const MatShape& outShape, size_t& axis, int& broadAxis) { + // Currently, this function can only complete 1-dimensional expansion of broadShape. + // If there are two dimensions in broadShape that need to be expended, it will fail. const size_t diff = outShape.size() - broadShape.size(); // find the first non-one element of the broadcasting shape diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 81f66e4598..cbeb059ea7 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -1060,6 +1060,8 @@ TEST_P(Test_ONNX_layers, Div) normAssert(ref, out, "", default_l1, default_lInf); expectNoFallbacksFromIE(net); expectNoFallbacksFromCUDA(net); + + testONNXModels("div_test_1x1",npy, 0, 0, false, true, 2); } TEST_P(Test_ONNX_layers, DynamicReshape) diff --git a/modules/features2d/src/sift.simd.hpp b/modules/features2d/src/sift.simd.hpp index 60129b1535..3d809f67ed 100644 --- a/modules/features2d/src/sift.simd.hpp +++ b/modules/features2d/src/sift.simd.hpp @@ -981,11 +981,20 @@ else // CV_8U __pack01 = v_pack_u(v_round(__dst0 * __nrm2), v_round(__dst1 * __nrm2)); v_pack_store(dst + k, __pack01); } +#endif + +#if defined(__GNUC__) && __GNUC__ >= 9 +// avoid warning "iteration 7 invokes undefined behavior" on Linux ARM64 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Waggressive-loop-optimizations" #endif for( ; k < len; k++ ) { dst[k] = saturate_cast(rawDst[k]*nrm2); } +#if defined(__GNUC__) && __GNUC__ >= 9 +#pragma GCC diagnostic pop +#endif } #else float* dst = dstMat.ptr(row); diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index 3e0180ab6a..ab64ffe6dd 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -78,11 +78,15 @@ Input depth (src.depth()) | Output depth (ddepth) --------------------------|---------------------- CV_8U | -1/CV_16S/CV_32F/CV_64F CV_16U/CV_16S | -1/CV_32F/CV_64F -CV_32F | -1/CV_32F/CV_64F +CV_32F | -1/CV_32F CV_64F | -1/CV_64F @note when ddepth=-1, the output image will have the same depth as the source. +@note if you need double floating-point accuracy and using single floating-point input data +(CV_32F input and CV_64F output depth combination), you can use @ref Mat.convertTo to convert +the input data to the desired precision. + @defgroup imgproc_transform Geometric Image Transformations The functions in this section perform various geometrical transformations of 2D images. They do not @@ -1792,7 +1796,7 @@ with the following \f$3 \times 3\f$ aperture: @param src Source image. @param dst Destination image of the same size and the same number of channels as src . -@param ddepth Desired depth of the destination image. +@param ddepth Desired depth of the destination image, see @ref filter_depths "combinations". @param ksize Aperture size used to compute the second-derivative filters. See #getDerivKernels for details. The size must be positive and odd. @param scale Optional scale factor for the computed Laplacian values. By default, no scaling is @@ -2279,7 +2283,7 @@ case of multi-channel images, each channel is processed independently. @param src input image; the number of channels can be arbitrary, but the depth should be one of CV_8U, CV_16U, CV_16S, CV_32F or CV_64F. @param dst output image of the same size and type as src. -@param kernel structuring element used for dilation; if elemenat=Mat(), a 3 x 3 rectangular +@param kernel structuring element used for dilation; if element=Mat(), a 3 x 3 rectangular structuring element is used. Kernel can be created using #getStructuringElement @param anchor position of the anchor within the element; default value (-1, -1) means that the anchor is at the element center. @@ -2809,7 +2813,7 @@ It makes possible to do a fast blurring or fast block correlation with a variabl example. In case of multi-channel images, sums for each channel are accumulated independently. As a practical example, the next figure shows the calculation of the integral of a straight -rectangle Rect(3,3,3,2) and of a tilted rectangle Rect(5,1,2,3) . The selected pixels in the +rectangle Rect(4,4,3,2) and of a tilted rectangle Rect(5,1,2,3) . The selected pixels in the original image are shown, as well as the relative pixels in the integral images sum and tilted . ![integral calculation example](pics/integral.png) @@ -3174,7 +3178,14 @@ CV_EXPORTS void calcHist( const Mat* images, int nimages, const int* histSize, const float** ranges, bool uniform = true, bool accumulate = false ); -/** @overload */ +/** @overload + +this variant supports only uniform histograms. + +ranges argument is either empty vector or a flattened vector of histSize.size()*2 elements +(histSize.size() element pairs). The first and second elements of each pair specify the lower and +upper boundaries. +*/ CV_EXPORTS_W void calcHist( InputArrayOfArrays images, const std::vector& channels, InputArray mask, OutputArray hist, diff --git a/modules/imgproc/src/drawing.cpp b/modules/imgproc/src/drawing.cpp index 5e31482dfa..c3435527cf 100644 --- a/modules/imgproc/src/drawing.cpp +++ b/modules/imgproc/src/drawing.cpp @@ -1058,7 +1058,7 @@ EllipseEx( Mat& img, Point2l center, Size2l axes, * Polygons filling * \****************************************************************************************/ -static inline void ICV_HLINE_X(uchar* ptr, int xl, int xr, const uchar* color, int pix_size) +static inline void ICV_HLINE_X(uchar* ptr, int64_t xl, int64_t xr, const uchar* color, int pix_size) { uchar* hline_min_ptr = (uchar*)(ptr) + (xl)*(pix_size); uchar* hline_end_ptr = (uchar*)(ptr) + (xr+1)*(pix_size); @@ -1083,7 +1083,7 @@ static inline void ICV_HLINE_X(uchar* ptr, int xl, int xr, const uchar* color, i } //end ICV_HLINE_X() -static inline void ICV_HLINE(uchar* ptr, int xl, int xr, const void* color, int pix_size) +static inline void ICV_HLINE(uchar* ptr, int64_t xl, int64_t xr, const void* color, int pix_size) { ICV_HLINE_X(ptr, xl, xr, reinterpret_cast(color), pix_size); } @@ -1177,7 +1177,7 @@ FillConvexPoly( Mat& img, const Point2l* v, int npts, const void* color, int lin edge[0].x = edge[1].x = -XY_ONE; edge[0].dx = edge[1].dx = 0; - ptr += img.step*y; + ptr += (int64_t)img.step*y; do { @@ -1206,7 +1206,7 @@ FillConvexPoly( Mat& img, const Point2l* v, int npts, const void* color, int lin } edge[i].ye = ty; - edge[i].dx = ((xe - xs)*2 + (ty - y)) / (2 * (ty - y)); + edge[i].dx = ((xe - xs)*2 + ((int64_t)ty - y)) / (2 * ((int64_t)ty - y)); edge[i].x = xs; edge[i].idx = idx; break; @@ -1480,7 +1480,7 @@ Circle( Mat& img, Point center, int radius, const void* color, int fill ) size_t step = img.step; int pix_size = (int)img.elemSize(); uchar* ptr = img.ptr(); - int err = 0, dx = radius, dy = 0, plus = 1, minus = (radius << 1) - 1; + int64_t err = 0, dx = radius, dy = 0, plus = 1, minus = (radius << 1) - 1; int inside = center.x >= radius && center.x < size.width - radius && center.y >= radius && center.y < size.height - radius; @@ -1490,8 +1490,8 @@ Circle( Mat& img, Point center, int radius, const void* color, int fill ) while( dx >= dy ) { int mask; - int y11 = center.y - dy, y12 = center.y + dy, y21 = center.y - dx, y22 = center.y + dx; - int x11 = center.x - dx, x12 = center.x + dx, x21 = center.x - dy, x22 = center.x + dy; + int64_t y11 = center.y - dy, y12 = center.y + dy, y21 = center.y - dx, y22 = center.y + dx; + int64_t x11 = center.x - dx, x12 = center.x + dx, x21 = center.x - dy, x22 = center.x + dy; if( inside ) { @@ -1531,7 +1531,7 @@ Circle( Mat& img, Point center, int radius, const void* color, int fill ) { if( fill ) { - x11 = std::max( x11, 0 ); + x11 = std::max( x11, (int64_t)0 ); x12 = MIN( x12, size.width - 1 ); } @@ -1569,7 +1569,7 @@ Circle( Mat& img, Point center, int radius, const void* color, int fill ) { if( fill ) { - x21 = std::max( x21, 0 ); + x21 = std::max( x21, (int64_t)0 ); x22 = MIN( x22, size.width - 1 ); } @@ -1866,6 +1866,12 @@ void rectangle( InputOutputArray img, Rect rec, { CV_INSTRUMENT_REGION(); + CV_Assert( 0 <= shift && shift <= XY_SHIFT ); + + // Crop the rectangle to right around the mat. + rec &= Rect(-(1 << shift), -(1 << shift), ((img.cols() + 2) << shift), + ((img.rows() + 2) << shift)); + if( !rec.empty() ) rectangle( img, rec.tl(), rec.br() - Point(1< ssize.width*2) { - row[(_dst.cols-1) + x] = row[dx + cn]; + row[(_dst.cols-1) * cn + x] = row[dx + cn]; } } diff --git a/modules/imgproc/test/test_pyramid.cpp b/modules/imgproc/test/test_pyramid.cpp new file mode 100644 index 0000000000..343d7a2321 --- /dev/null +++ b/modules/imgproc/test/test_pyramid.cpp @@ -0,0 +1,19 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +TEST(Imgproc_PyrUp, pyrUp_regression_22184) +{ + Mat src(100, 100, CV_16UC3, Scalar::all(255)); + Mat dst(100 * 2 + 1, 100 * 2 + 1, CV_16UC3, Scalar::all(0)); + pyrUp(src, dst, Size(dst.cols, dst.rows)); + double min_val = 0; + minMaxLoc(dst, &min_val); + ASSERT_GT(cvRound(min_val), 0); +} + +}} // namespace diff --git a/modules/js/src/core_bindings.cpp b/modules/js/src/core_bindings.cpp index 70d1dc70b8..1f86711c16 100644 --- a/modules/js/src/core_bindings.cpp +++ b/modules/js/src/core_bindings.cpp @@ -453,6 +453,7 @@ namespace binding_utils EMSCRIPTEN_BINDINGS(binding_utils) { register_vector("IntVector"); + register_vector("CharVector"); register_vector("FloatVector"); register_vector("DoubleVector"); register_vector("PointVector"); diff --git a/modules/objdetect/test/test_precomp.hpp b/modules/objdetect/test/test_precomp.hpp index c53b277aa1..88b8e9a4f5 100644 --- a/modules/objdetect/test/test_precomp.hpp +++ b/modules/objdetect/test/test_precomp.hpp @@ -7,4 +7,10 @@ #include "opencv2/ts.hpp" #include "opencv2/objdetect.hpp" +#if defined CV_CXX11 + #include +#else + #include +#endif + #endif diff --git a/modules/objdetect/test/test_qrcode_encode.cpp b/modules/objdetect/test/test_qrcode_encode.cpp index fe4c9fc954..4ccdd828a2 100644 --- a/modules/objdetect/test/test_qrcode_encode.cpp +++ b/modules/objdetect/test/test_qrcode_encode.cpp @@ -5,6 +5,16 @@ #include "test_precomp.hpp" namespace opencv_test { namespace { +#if !defined CV_CXX11 +// Wrapper for generating seeded random number via std::rand. +template +class SeededRandFunctor { +public: + SeededRandFunctor() { std::srand(Seed); } + int operator()(int i) { return std::rand() % (i + 1); } +}; +#endif + std::string encode_qrcode_images_name[] = { "version1_mode1.png", "version1_mode2.png", "version1_mode4.png", "version2_mode1.png", "version2_mode2.png", "version2_mode4.png", @@ -380,8 +390,15 @@ TEST(Objdetect_QRCode_Encode_Decode_Structured_Append, DISABLED_regression) std::string symbol_set = config["symbols_set"]; std::string input_info = symbol_set; - std::random_shuffle(input_info.begin(), input_info.end()); - +#if defined CV_CXX11 + // std::random_shuffle is deprecated since C++11 and removed in C++17. + // Use manually constructed RNG with a fixed seed and std::shuffle instead. + std::mt19937 rand_gen {1}; + std::shuffle(input_info.begin(), input_info.end(), rand_gen); +#else + SeededRandFunctor<1> rand_gen; + std::random_shuffle(input_info.begin(), input_info.end(), rand_gen); +#endif for (int j = min_stuctures_num; j < max_stuctures_num; j++) { QRCodeEncoder::Params params; diff --git a/modules/python/CMakeLists.txt b/modules/python/CMakeLists.txt index c6a9075224..93eab8c94d 100644 --- a/modules/python/CMakeLists.txt +++ b/modules/python/CMakeLists.txt @@ -35,7 +35,7 @@ add_subdirectory(python3) else() # standalone build -cmake_minimum_required(VERSION 2.8.12) +cmake_minimum_required(VERSION 2.8.12.2) project(OpenCVPython CXX C) include("./standalone.cmake") diff --git a/modules/python/package/cv2/__init__.py b/modules/python/package/cv2/__init__.py index 07d1e0d21e..550482bd17 100644 --- a/modules/python/package/cv2/__init__.py +++ b/modules/python/package/cv2/__init__.py @@ -89,7 +89,7 @@ def bootstrap(): BINARIES_PATHS = [] g_vars = globals() - l_vars = locals() + l_vars = locals().copy() if sys.version_info[:2] < (3, 0): from . load_config_py2 import exec_file_wrapper diff --git a/modules/videoio/src/ffmpeg_codecs.hpp b/modules/videoio/src/ffmpeg_codecs.hpp index 61788e0345..faad2596ed 100644 --- a/modules/videoio/src/ffmpeg_codecs.hpp +++ b/modules/videoio/src/ffmpeg_codecs.hpp @@ -60,6 +60,7 @@ extern "C" { #include #endif +#include #include #ifdef __cplusplus diff --git a/platforms/android/android.toolchain.cmake b/platforms/android/android.toolchain.cmake index 50b342c7a6..1dca060fdf 100644 --- a/platforms/android/android.toolchain.cmake +++ b/platforms/android/android.toolchain.cmake @@ -189,7 +189,7 @@ # # ------------------------------------------------------------------------------ -cmake_minimum_required( VERSION 2.6.3 ) +cmake_minimum_required( VERSION 2.8.12.2 ) if( DEFINED CMAKE_CROSSCOMPILING ) # subsequent toolchain loading is not really needed diff --git a/samples/cpp/imgcodecs_jpeg.cpp b/samples/cpp/imgcodecs_jpeg.cpp index b3abc49286..4c22a483d7 100644 --- a/samples/cpp/imgcodecs_jpeg.cpp +++ b/samples/cpp/imgcodecs_jpeg.cpp @@ -17,7 +17,7 @@ int main(int /*argc*/, const char** /* argv */ ) { const Point center( img.rows / 2 , img.cols /2 ); - for( int radius = 5; radius < img.rows ; radius += 3.5 ) + for( int radius = 5; radius < img.rows ; radius += 3 ) { cv::circle( img, center, radius, Scalar(255,0,255) ); } diff --git a/samples/hal/c_hal/CMakeLists.txt b/samples/hal/c_hal/CMakeLists.txt index 8502779e84..8cf78aa5ff 100644 --- a/samples/hal/c_hal/CMakeLists.txt +++ b/samples/hal/c_hal/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8.8 FATAL_ERROR) +cmake_minimum_required(VERSION 2.8.12.2 FATAL_ERROR) set(PROJECT_NAME "c_hal") set(HAL_LIB_NAME "c_hal") diff --git a/samples/hal/slow_hal/CMakeLists.txt b/samples/hal/slow_hal/CMakeLists.txt index d42fb0b6fd..1ffa4670b6 100644 --- a/samples/hal/slow_hal/CMakeLists.txt +++ b/samples/hal/slow_hal/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8.8 FATAL_ERROR) +cmake_minimum_required(VERSION 2.8.12.2 FATAL_ERROR) set(PROJECT_NAME "slow_hal") set(HAL_LIB_NAME "slow_hal") diff --git a/samples/openvx/CMakeLists.txt b/samples/openvx/CMakeLists.txt index 701184d369..fd04e6b9e2 100644 --- a/samples/openvx/CMakeLists.txt +++ b/samples/openvx/CMakeLists.txt @@ -1,6 +1,6 @@ ocv_install_example_src(cpp *.cpp *.hpp CMakeLists.txt) -cmake_minimum_required(VERSION 2.8.9) +cmake_minimum_required(VERSION 2.8.12.2) set(OPENCV_OPENVX_SAMPLE_REQUIRED_DEPS opencv_core