diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index 8da28d275f..a84d7fc3ad 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -112,6 +112,10 @@ ocv_target_link_libraries(${the_module} PRIVATE "${OPENCV_HAL_LINKER_LIBS}" ) +if(OPENCV_CORE_EXCLUDE_C_API) + ocv_target_compile_definitions(${the_module} PRIVATE "OPENCV_EXCLUDE_C_API=1") +endif() + ocv_add_accuracy_tests() ocv_add_perf_tests() diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 760bbcb088..41b281c8de 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -57,24 +57,6 @@ namespace cv * logical operations * \****************************************************************************************/ -void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize ) -{ - int scn = (int)sc.total(), cn = CV_MAT_CN(buftype); - size_t esz = CV_ELEM_SIZE(buftype); - getConvertFunc(sc.depth(), buftype)(sc.ptr(), 1, 0, 1, scbuf, 1, Size(std::min(cn, scn), 1), 0); - // unroll the scalar - if( scn < cn ) - { - CV_Assert( scn == 1 ); - size_t esz1 = CV_ELEM_SIZE1(buftype); - for( size_t i = esz1; i < esz; i++ ) - scbuf[i] = scbuf[i - esz1]; - } - for( size_t i = esz; i < blocksize*esz; i++ ) - scbuf[i] = scbuf[i - esz]; -} - - enum { OCL_OP_ADD=0, OCL_OP_SUB=1, OCL_OP_RSUB=2, OCL_OP_ABSDIFF=3, OCL_OP_MUL=4, OCL_OP_MUL_SCALE=5, OCL_OP_DIV_SCALE=6, OCL_OP_RECIP_SCALE=7, OCL_OP_ADDW=8, OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14, @@ -1041,9 +1023,7 @@ static BinaryFuncC* getRecipTab() return recipTab; } -} - -void cv::multiply(InputArray src1, InputArray src2, +void multiply(InputArray src1, InputArray src2, OutputArray dst, double scale, int dtype) { CV_INSTRUMENT_REGION(); @@ -1052,7 +1032,7 @@ void cv::multiply(InputArray src1, InputArray src2, true, &scale, std::abs(scale - 1.0) < DBL_EPSILON ? OCL_OP_MUL : OCL_OP_MUL_SCALE); } -void cv::divide(InputArray src1, InputArray src2, +void divide(InputArray src1, InputArray src2, OutputArray dst, double scale, int dtype) { CV_INSTRUMENT_REGION(); @@ -1060,7 +1040,7 @@ void cv::divide(InputArray src1, InputArray src2, arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale, OCL_OP_DIV_SCALE); } -void cv::divide(double scale, InputArray src2, +void divide(double scale, InputArray src2, OutputArray dst, int dtype) { CV_INSTRUMENT_REGION(); @@ -1068,13 +1048,17 @@ void cv::divide(double scale, InputArray src2, arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale, OCL_OP_RECIP_SCALE); } +UMat UMat::mul(InputArray m, double scale) const +{ + UMat dst; + multiply(*this, m, dst, scale); + return dst; +} + /****************************************************************************************\ * addWeighted * \****************************************************************************************/ -namespace cv -{ - static BinaryFuncC* getAddWeightedTab() { static BinaryFuncC addWeightedTab[] = @@ -1879,6 +1863,9 @@ void cv::inRange(InputArray _src, InputArray _lowerb, } } + +#ifndef OPENCV_EXCLUDE_C_API + /****************************************************************************************\ * Earlier API: cvAdd etc. * \****************************************************************************************/ @@ -2141,4 +2128,5 @@ cvMaxS( const void* srcarr1, double value, void* dstarr ) cv::max( src1, value, dst ); } +#endif // OPENCV_EXCLUDE_C_API /* End of file. */ diff --git a/modules/core/src/array.cpp b/modules/core/src/array.cpp index f2a79b5a69..1a5ea0100f 100644 --- a/modules/core/src/array.cpp +++ b/modules/core/src/array.cpp @@ -48,6 +48,8 @@ #include "precomp.hpp" +#ifndef OPENCV_EXCLUDE_C_API + #define CV_ORIGIN_TL 0 #define CV_ORIGIN_BL 1 @@ -3223,51 +3225,50 @@ template<> void DefaultDeleter::operator ()(CvMemStorage* obj) con template<> void DefaultDeleter::operator ()(CvFileStorage* obj) const { cvReleaseFileStorage(&obj); } -template static inline -void scalarToRawData_(const Scalar& s, T * const buf, const int cn, const int unroll_to) -{ - int i = 0; - for(; i < cn; i++) - buf[i] = saturate_cast(s.val[i]); - for(; i < unroll_to; i++) - buf[i] = buf[i-cn]; -} +} // cv:: -void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to) + +/* universal functions */ +CV_IMPL void +cvRelease( void** struct_ptr ) { - CV_INSTRUMENT_REGION(); + CvTypeInfo* info; - const int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - CV_Assert(cn <= 4); - switch(depth) + if( !struct_ptr ) + CV_Error( CV_StsNullPtr, "NULL double pointer" ); + + if( *struct_ptr ) { - case CV_8U: - scalarToRawData_(s, (uchar*)_buf, cn, unroll_to); - break; - case CV_8S: - scalarToRawData_(s, (schar*)_buf, cn, unroll_to); - break; - case CV_16U: - scalarToRawData_(s, (ushort*)_buf, cn, unroll_to); - break; - case CV_16S: - scalarToRawData_(s, (short*)_buf, cn, unroll_to); - break; - case CV_32S: - scalarToRawData_(s, (int*)_buf, cn, unroll_to); - break; - case CV_32F: - scalarToRawData_(s, (float*)_buf, cn, unroll_to); - break; - case CV_64F: - scalarToRawData_(s, (double*)_buf, cn, unroll_to); - break; - default: - CV_Error(CV_StsUnsupportedFormat,""); + info = cvTypeOf( *struct_ptr ); + if( !info ) + CV_Error( CV_StsError, "Unknown object type" ); + if( !info->release ) + CV_Error( CV_StsError, "release function pointer is NULL" ); + + info->release( struct_ptr ); + *struct_ptr = 0; } } -} // cv:: + +void* cvClone( const void* struct_ptr ) +{ + void* struct_copy = 0; + CvTypeInfo* info; + + if( !struct_ptr ) + CV_Error( CV_StsNullPtr, "NULL structure pointer" ); + + info = cvTypeOf( struct_ptr ); + if( !info ) + CV_Error( CV_StsError, "Unknown object type" ); + if( !info->clone ) + CV_Error( CV_StsError, "clone function pointer is NULL" ); + + struct_copy = info->clone( struct_ptr ); + return struct_copy; +} +#endif // OPENCV_EXCLUDE_C_API /* End of file. */ diff --git a/modules/core/src/convert_c.cpp b/modules/core/src/convert_c.cpp index efe4de740a..96beffccc6 100644 --- a/modules/core/src/convert_c.cpp +++ b/modules/core/src/convert_c.cpp @@ -5,6 +5,7 @@ #include "precomp.hpp" +#ifndef OPENCV_EXCLUDE_C_API CV_IMPL void cvSplit( const void* srcarr, void* dstarr0, void* dstarr1, void* dstarr2, void* dstarr3 ) @@ -132,3 +133,5 @@ CV_IMPL void cvNormalize( const CvArr* srcarr, CvArr* dstarr, CV_Assert( dst.size() == src.size() && src.channels() == dst.channels() ); cv::normalize( src, dst, a, b, norm_type, dst.type(), mask ); } + +#endif // OPENCV_EXCLUDE_C_API diff --git a/modules/core/src/convert_scale.dispatch.cpp b/modules/core/src/convert_scale.dispatch.cpp index 83376aa61d..6902ecc24b 100644 --- a/modules/core/src/convert_scale.dispatch.cpp +++ b/modules/core/src/convert_scale.dispatch.cpp @@ -9,7 +9,6 @@ #include "convert_scale.simd.hpp" #include "convert_scale.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content - namespace cv { @@ -117,143 +116,4 @@ void convertScaleAbs(InputArray _src, OutputArray _dst, double alpha, double bet } } -//================================================================================================== - -#ifdef HAVE_OPENCL - -static bool ocl_normalize( InputArray _src, InputOutputArray _dst, InputArray _mask, int dtype, - double scale, double delta ) -{ - UMat src = _src.getUMat(); - - if( _mask.empty() ) - src.convertTo( _dst, dtype, scale, delta ); - else if (src.channels() <= 4) - { - const ocl::Device & dev = ocl::Device::getDefault(); - - int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), - ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32F, std::max(sdepth, ddepth)), - rowsPerWI = dev.isIntel() ? 4 : 1; - - float fscale = static_cast(scale), fdelta = static_cast(delta); - bool haveScale = std::fabs(scale - 1) > DBL_EPSILON, - haveZeroScale = !(std::fabs(scale) > DBL_EPSILON), - haveDelta = std::fabs(delta) > DBL_EPSILON, - doubleSupport = dev.doubleFPConfig() > 0; - - if (!haveScale && !haveDelta && stype == dtype) - { - _src.copyTo(_dst, _mask); - return true; - } - if (haveZeroScale) - { - _dst.setTo(Scalar(delta), _mask); - return true; - } - - if ((sdepth == CV_64F || ddepth == CV_64F) && !doubleSupport) - return false; - - char cvt[2][40]; - String opts = format("-D srcT=%s -D dstT=%s -D convertToWT=%s -D cn=%d -D rowsPerWI=%d" - " -D convertToDT=%s -D workT=%s%s%s%s -D srcT1=%s -D dstT1=%s", - ocl::typeToStr(stype), ocl::typeToStr(dtype), - ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), cn, - rowsPerWI, ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), - ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), - doubleSupport ? " -D DOUBLE_SUPPORT" : "", - haveScale ? " -D HAVE_SCALE" : "", - haveDelta ? " -D HAVE_DELTA" : "", - ocl::typeToStr(sdepth), ocl::typeToStr(ddepth)); - - ocl::Kernel k("normalizek", ocl::core::normalize_oclsrc, opts); - if (k.empty()) - return false; - - UMat mask = _mask.getUMat(), dst = _dst.getUMat(); - - ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), - maskarg = ocl::KernelArg::ReadOnlyNoSize(mask), - dstarg = ocl::KernelArg::ReadWrite(dst); - - if (haveScale) - { - if (haveDelta) - k.args(srcarg, maskarg, dstarg, fscale, fdelta); - else - k.args(srcarg, maskarg, dstarg, fscale); - } - else - { - if (haveDelta) - k.args(srcarg, maskarg, dstarg, fdelta); - else - k.args(srcarg, maskarg, dstarg); - } - - size_t globalsize[2] = { (size_t)src.cols, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI }; - return k.run(2, globalsize, NULL, false); - } - else - { - UMat temp; - src.convertTo( temp, dtype, scale, delta ); - temp.copyTo( _dst, _mask ); - } - - return true; -} - -#endif - -void normalize(InputArray _src, InputOutputArray _dst, double a, double b, - int norm_type, int rtype, InputArray _mask) -{ - CV_INSTRUMENT_REGION(); - - double scale = 1, shift = 0; - int type = _src.type(), depth = CV_MAT_DEPTH(type); - - if( rtype < 0 ) - rtype = _dst.fixedType() ? _dst.depth() : depth; - - if( norm_type == CV_MINMAX ) - { - double smin = 0, smax = 0; - double dmin = MIN( a, b ), dmax = MAX( a, b ); - minMaxIdx( _src, &smin, &smax, 0, 0, _mask ); - scale = (dmax - dmin)*(smax - smin > DBL_EPSILON ? 1./(smax - smin) : 0); - if( rtype == CV_32F ) - { - scale = (float)scale; - shift = (float)dmin - (float)(smin*scale); - } - else - shift = dmin - smin*scale; - } - else if( norm_type == CV_L2 || norm_type == CV_L1 || norm_type == CV_C ) - { - scale = norm( _src, norm_type, _mask ); - scale = scale > DBL_EPSILON ? a/scale : 0.; - shift = 0; - } - else - CV_Error( CV_StsBadArg, "Unknown/unsupported norm type" ); - - CV_OCL_RUN(_dst.isUMat(), - ocl_normalize(_src, _dst, _mask, rtype, scale, shift)) - - Mat src = _src.getMat(); - if( _mask.empty() ) - src.convertTo( _dst, rtype, scale, shift ); - else - { - Mat temp; - src.convertTo( temp, rtype, scale, shift ); - temp.copyTo( _dst, _mask ); - } -} - } // namespace diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index 798fde74d4..5262eb1b9c 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -53,6 +53,75 @@ namespace cv { +template static inline +void scalarToRawData_(const Scalar& s, T * const buf, const int cn, const int unroll_to) +{ + int i = 0; + for(; i < cn; i++) + buf[i] = saturate_cast(s.val[i]); + for(; i < unroll_to; i++) + buf[i] = buf[i-cn]; +} + +void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to) +{ + CV_INSTRUMENT_REGION(); + + const int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + CV_Assert(cn <= 4); + switch(depth) + { + case CV_8U: + scalarToRawData_(s, (uchar*)_buf, cn, unroll_to); + break; + case CV_8S: + scalarToRawData_(s, (schar*)_buf, cn, unroll_to); + break; + case CV_16U: + scalarToRawData_(s, (ushort*)_buf, cn, unroll_to); + break; + case CV_16S: + scalarToRawData_(s, (short*)_buf, cn, unroll_to); + break; + case CV_32S: + scalarToRawData_(s, (int*)_buf, cn, unroll_to); + break; + case CV_32F: + scalarToRawData_(s, (float*)_buf, cn, unroll_to); + break; + case CV_64F: + scalarToRawData_(s, (double*)_buf, cn, unroll_to); + break; +#if CV_VERSION_MAJOR >= 4 + case CV_16F: + scalarToRawData_(s, (float16_t*)_buf, cn, unroll_to); + break; +#endif + default: + CV_Error(CV_StsUnsupportedFormat,""); + } +} + +void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize ) +{ + int scn = (int)sc.total(), cn = CV_MAT_CN(buftype); + size_t esz = CV_ELEM_SIZE(buftype); + BinaryFunc cvtFn = getConvertFunc(sc.depth(), buftype); + CV_Assert(cvtFn); + cvtFn(sc.ptr(), 1, 0, 1, scbuf, 1, Size(std::min(cn, scn), 1), 0); + // unroll the scalar + if( scn < cn ) + { + CV_Assert( scn == 1 ); + size_t esz1 = CV_ELEM_SIZE1(buftype); + for( size_t i = esz1; i < esz; i++ ) + scbuf[i] = scbuf[i - esz1]; + } + for( size_t i = esz; i < blocksize*esz; i++ ) + scbuf[i] = scbuf[i - esz]; +} + + template static void copyMask_(const uchar* _src, size_t sstep, const uchar* mask, size_t mstep, uchar* _dst, size_t dstep, Size size) { @@ -594,490 +663,6 @@ Mat& Mat::setTo(InputArray _value, InputArray _mask) return *this; } -#if CV_SIMD128 -template CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) -{ - typedef typename V::lane_type T; - int end = (int)(size.width*esz); - int width = (end + 1)/2; - int width_1 = width & -v_uint8x16::nlanes; - int i, j; - -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(src, dst)); -#endif - - for( ; size.height--; src += sstep, dst += dstep ) - { - for( i = 0, j = end; i < width_1; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes ) - { - V t0, t1; - - t0 = v_load((T*)((uchar*)src + i)); - t1 = v_load((T*)((uchar*)src + j - v_uint8x16::nlanes)); - t0 = v_reverse(t0); - t1 = v_reverse(t1); - v_store((T*)(dst + j - v_uint8x16::nlanes), t0); - v_store((T*)(dst + i), t1); - } - if (isAligned(src, dst)) - { - for ( ; i < width; i += sizeof(T), j -= sizeof(T) ) - { - T t0, t1; - - t0 = *((T*)((uchar*)src + i)); - t1 = *((T*)((uchar*)src + j - sizeof(T))); - *((T*)(dst + j - sizeof(T))) = t0; - *((T*)(dst + i)) = t1; - } - } - else - { - for ( ; i < width; i += sizeof(T), j -= sizeof(T) ) - { - for (int k = 0; k < (int)sizeof(T); k++) - { - uchar t0, t1; - - t0 = *((uchar*)src + i + k); - t1 = *((uchar*)src + j + k - sizeof(T)); - *(dst + j + k - sizeof(T)) = t0; - *(dst + i + k) = t1; - } - } - } - } -} - -template CV_ALWAYS_INLINE void flipHoriz_double( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) -{ - int end = (int)(size.width*esz); - int width = (end + 1)/2; - -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(src, dst)); - CV_Assert(isAligned(src, dst)); -#endif - - for( ; size.height--; src += sstep, dst += dstep ) - { - for ( int i = 0, j = end; i < width; i += sizeof(T1) + sizeof(T2), j -= sizeof(T1) + sizeof(T2) ) - { - T1 t0, t1; - T2 t2, t3; - - t0 = *((T1*)((uchar*)src + i)); - t2 = *((T2*)((uchar*)src + i + sizeof(T1))); - t1 = *((T1*)((uchar*)src + j - sizeof(T1) - sizeof(T2))); - t3 = *((T2*)((uchar*)src + j - sizeof(T2))); - *((T1*)(dst + j - sizeof(T1) - sizeof(T2))) = t0; - *((T2*)(dst + j - sizeof(T2))) = t2; - *((T1*)(dst + i)) = t1; - *((T2*)(dst + i + sizeof(T1))) = t3; - } - } -} -#endif - -static void -flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) -{ -#if CV_SIMD -#if CV_STRONG_ALIGNMENT - size_t alignmentMark = ((size_t)src)|((size_t)dst)|sstep|dstep; -#endif - if (esz == 2 * v_uint8x16::nlanes) - { - int end = (int)(size.width*esz); - int width = end/2; - - for( ; size.height--; src += sstep, dst += dstep ) - { - for( int i = 0, j = end - 2 * v_uint8x16::nlanes; i < width; i += 2 * v_uint8x16::nlanes, j -= 2 * v_uint8x16::nlanes ) - { -#if CV_SIMD256 - v_uint8x32 t0, t1; - - t0 = v256_load((uchar*)src + i); - t1 = v256_load((uchar*)src + j); - v_store(dst + j, t0); - v_store(dst + i, t1); -#else - v_uint8x16 t0, t1, t2, t3; - - t0 = v_load((uchar*)src + i); - t1 = v_load((uchar*)src + i + v_uint8x16::nlanes); - t2 = v_load((uchar*)src + j); - t3 = v_load((uchar*)src + j + v_uint8x16::nlanes); - v_store(dst + j, t0); - v_store(dst + j + v_uint8x16::nlanes, t1); - v_store(dst + i, t2); - v_store(dst + i + v_uint8x16::nlanes, t3); -#endif - } - } - } - else if (esz == v_uint8x16::nlanes) - { - int end = (int)(size.width*esz); - int width = end/2; - - for( ; size.height--; src += sstep, dst += dstep ) - { - for( int i = 0, j = end - v_uint8x16::nlanes; i < width; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes ) - { - v_uint8x16 t0, t1; - - t0 = v_load((uchar*)src + i); - t1 = v_load((uchar*)src + j); - v_store(dst + j, t0); - v_store(dst + i, t1); - } - } - } - else if (esz == 8 -#if CV_STRONG_ALIGNMENT - && isAligned(alignmentMark) -#endif - ) - { - flipHoriz_single(src, sstep, dst, dstep, size, esz); - } - else if (esz == 4 -#if CV_STRONG_ALIGNMENT - && isAligned(alignmentMark) -#endif - ) - { - flipHoriz_single(src, sstep, dst, dstep, size, esz); - } - else if (esz == 2 -#if CV_STRONG_ALIGNMENT - && isAligned(alignmentMark) -#endif - ) - { - flipHoriz_single(src, sstep, dst, dstep, size, esz); - } - else if (esz == 1) - { - flipHoriz_single(src, sstep, dst, dstep, size, esz); - } - else if (esz == 24 -#if CV_STRONG_ALIGNMENT - && isAligned(alignmentMark) -#endif - ) - { - int end = (int)(size.width*esz); - int width = (end + 1)/2; - - for( ; size.height--; src += sstep, dst += dstep ) - { - for ( int i = 0, j = end; i < width; i += v_uint8x16::nlanes + sizeof(uint64_t), j -= v_uint8x16::nlanes + sizeof(uint64_t) ) - { - v_uint8x16 t0, t1; - uint64_t t2, t3; - - t0 = v_load((uchar*)src + i); - t2 = *((uint64_t*)((uchar*)src + i + v_uint8x16::nlanes)); - t1 = v_load((uchar*)src + j - v_uint8x16::nlanes - sizeof(uint64_t)); - t3 = *((uint64_t*)((uchar*)src + j - sizeof(uint64_t))); - v_store(dst + j - v_uint8x16::nlanes - sizeof(uint64_t), t0); - *((uint64_t*)(dst + j - sizeof(uint64_t))) = t2; - v_store(dst + i, t1); - *((uint64_t*)(dst + i + v_uint8x16::nlanes)) = t3; - } - } - } -#if !CV_STRONG_ALIGNMENT - else if (esz == 12) - { - flipHoriz_double(src, sstep, dst, dstep, size, esz); - } - else if (esz == 6) - { - flipHoriz_double(src, sstep, dst, dstep, size, esz); - } - else if (esz == 3) - { - flipHoriz_double(src, sstep, dst, dstep, size, esz); - } -#endif - else -#endif // CV_SIMD - { - int i, j, limit = (int)(((size.width + 1)/2)*esz); - AutoBuffer _tab(size.width*esz); - int* tab = _tab.data(); - - for( i = 0; i < size.width; i++ ) - for( size_t k = 0; k < esz; k++ ) - tab[i*esz + k] = (int)((size.width - i - 1)*esz + k); - - for( ; size.height--; src += sstep, dst += dstep ) - { - for( i = 0; i < limit; i++ ) - { - j = tab[i]; - uchar t0 = src[i], t1 = src[j]; - dst[i] = t1; dst[j] = t0; - } - } - } -} - -static void -flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size, size_t esz ) -{ - const uchar* src1 = src0 + (size.height - 1)*sstep; - uchar* dst1 = dst0 + (size.height - 1)*dstep; - size.width *= (int)esz; - - for( int y = 0; y < (size.height + 1)/2; y++, src0 += sstep, src1 -= sstep, - dst0 += dstep, dst1 -= dstep ) - { - int i = 0; -#if CV_SIMD -#if CV_STRONG_ALIGNMENT - if (isAligned(src0, src1, dst0, dst1)) -#endif - { - for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) - { - v_int32 t0 = vx_load((int*)(src0 + i)); - v_int32 t1 = vx_load((int*)(src1 + i)); - vx_store((int*)(dst0 + i), t1); - vx_store((int*)(dst1 + i), t0); - } - } -#if CV_STRONG_ALIGNMENT - else - { - for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) - { - v_uint8 t0 = vx_load(src0 + i); - v_uint8 t1 = vx_load(src1 + i); - vx_store(dst0 + i, t1); - vx_store(dst1 + i, t0); - } - } -#endif -#endif - - if (isAligned(src0, src1, dst0, dst1)) - { - for( ; i <= size.width - 16; i += 16 ) - { - int t0 = ((int*)(src0 + i))[0]; - int t1 = ((int*)(src1 + i))[0]; - - ((int*)(dst0 + i))[0] = t1; - ((int*)(dst1 + i))[0] = t0; - - t0 = ((int*)(src0 + i))[1]; - t1 = ((int*)(src1 + i))[1]; - - ((int*)(dst0 + i))[1] = t1; - ((int*)(dst1 + i))[1] = t0; - - t0 = ((int*)(src0 + i))[2]; - t1 = ((int*)(src1 + i))[2]; - - ((int*)(dst0 + i))[2] = t1; - ((int*)(dst1 + i))[2] = t0; - - t0 = ((int*)(src0 + i))[3]; - t1 = ((int*)(src1 + i))[3]; - - ((int*)(dst0 + i))[3] = t1; - ((int*)(dst1 + i))[3] = t0; - } - - for( ; i <= size.width - 4; i += 4 ) - { - int t0 = ((int*)(src0 + i))[0]; - int t1 = ((int*)(src1 + i))[0]; - - ((int*)(dst0 + i))[0] = t1; - ((int*)(dst1 + i))[0] = t0; - } - } - - for( ; i < size.width; i++ ) - { - uchar t0 = src0[i]; - uchar t1 = src1[i]; - - dst0[i] = t1; - dst1[i] = t0; - } - } -} - -#ifdef HAVE_OPENCL - -enum { FLIP_COLS = 1 << 0, FLIP_ROWS = 1 << 1, FLIP_BOTH = FLIP_ROWS | FLIP_COLS }; - -static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode ) -{ - CV_Assert(flipCode >= -1 && flipCode <= 1); - - const ocl::Device & dev = ocl::Device::getDefault(); - int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), - flipType, kercn = std::min(ocl::predictOptimalVectorWidth(_src, _dst), 4); - - bool doubleSupport = dev.doubleFPConfig() > 0; - if (!doubleSupport && depth == CV_64F) - kercn = cn; - - if (cn > 4) - return false; - - const char * kernelName; - if (flipCode == 0) - kernelName = "arithm_flip_rows", flipType = FLIP_ROWS; - else if (flipCode > 0) - kernelName = "arithm_flip_cols", flipType = FLIP_COLS; - else - kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH; - - int pxPerWIy = (dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1; - kercn = (cn!=3 || flipType == FLIP_ROWS) ? std::max(kercn, cn) : cn; - - ocl::Kernel k(kernelName, ocl::core::flip_oclsrc, - format( "-D T=%s -D T1=%s -D DEPTH=%d -D cn=%d -D PIX_PER_WI_Y=%d -D kercn=%d", - kercn != cn ? ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)) : ocl::vecopTypeToStr(CV_MAKE_TYPE(depth, kercn)), - kercn != cn ? ocl::typeToStr(depth) : ocl::vecopTypeToStr(depth), depth, cn, pxPerWIy, kercn)); - if (k.empty()) - return false; - - Size size = _src.size(); - _dst.create(size, type); - UMat src = _src.getUMat(), dst = _dst.getUMat(); - - int cols = size.width * cn / kercn, rows = size.height; - cols = flipType == FLIP_COLS ? (cols + 1) >> 1 : cols; - rows = flipType & FLIP_ROWS ? (rows + 1) >> 1 : rows; - - k.args(ocl::KernelArg::ReadOnlyNoSize(src), - ocl::KernelArg::WriteOnly(dst, cn, kercn), rows, cols); - - size_t maxWorkGroupSize = dev.maxWorkGroupSize(); - CV_Assert(maxWorkGroupSize % 4 == 0); - - size_t globalsize[2] = { (size_t)cols, ((size_t)rows + pxPerWIy - 1) / pxPerWIy }, - localsize[2] = { maxWorkGroupSize / 4, 4 }; - return k.run(2, globalsize, (flipType == FLIP_COLS) && !dev.isIntel() ? localsize : NULL, false); -} - -#endif - -#if defined HAVE_IPP -static bool ipp_flip(Mat &src, Mat &dst, int flip_mode) -{ -#ifdef HAVE_IPP_IW - CV_INSTRUMENT_REGION_IPP(); - - // Details: https://github.com/opencv/opencv/issues/12943 - if (flip_mode <= 0 /* swap rows */ - && cv::ipp::getIppTopFeatures() != ippCPUID_SSE42 - && (int64_t)(src.total()) * src.elemSize() >= CV_BIG_INT(0x80000000)/*2Gb*/ - ) - return false; - - IppiAxis ippMode; - if(flip_mode < 0) - ippMode = ippAxsBoth; - else if(flip_mode == 0) - ippMode = ippAxsHorizontal; - else - ippMode = ippAxsVertical; - - try - { - ::ipp::IwiImage iwSrc = ippiGetImage(src); - ::ipp::IwiImage iwDst = ippiGetImage(dst); - - CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, iwSrc, iwDst, ippMode); - } - catch(const ::ipp::IwException &) - { - return false; - } - - return true; -#else - CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(flip_mode); - return false; -#endif -} -#endif - - -void flip( InputArray _src, OutputArray _dst, int flip_mode ) -{ - CV_INSTRUMENT_REGION(); - - CV_Assert( _src.dims() <= 2 ); - Size size = _src.size(); - - if (flip_mode < 0) - { - if (size.width == 1) - flip_mode = 0; - if (size.height == 1) - flip_mode = 1; - } - - if ((size.width == 1 && flip_mode > 0) || - (size.height == 1 && flip_mode == 0)) - { - return _src.copyTo(_dst); - } - - CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src, _dst, flip_mode)) - - Mat src = _src.getMat(); - int type = src.type(); - _dst.create( size, type ); - Mat dst = _dst.getMat(); - - CV_IPP_RUN_FAST(ipp_flip(src, dst, flip_mode)); - - size_t esz = CV_ELEM_SIZE(type); - - if( flip_mode <= 0 ) - flipVert( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz ); - else - flipHoriz( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz ); - - if( flip_mode < 0 ) - flipHoriz( dst.ptr(), dst.step, dst.ptr(), dst.step, dst.size(), esz ); -} - -void rotate(InputArray _src, OutputArray _dst, int rotateMode) -{ - CV_Assert(_src.dims() <= 2); - - switch (rotateMode) - { - case ROTATE_90_CLOCKWISE: - transpose(_src, _dst); - flip(_dst, _dst, 1); - break; - case ROTATE_180: - flip(_src, _dst, -1); - break; - case ROTATE_90_COUNTERCLOCKWISE: - transpose(_src, _dst); - flip(_dst, _dst, 0); - break; - default: - break; - } -} #if defined HAVE_OPENCL && !defined __APPLE__ @@ -1499,6 +1084,9 @@ void cv::copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom, } } + +#ifndef OPENCV_EXCLUDE_C_API + /* dst = src */ CV_IMPL void cvCopy( const void* srcarr, void* dstarr, const void* maskarr ) @@ -1614,4 +1202,5 @@ cvRepeat( const CvArr* srcarr, CvArr* dstarr ) cv::repeat(src, dst.rows/src.rows, dst.cols/src.cols, dst); } +#endif // OPENCV_EXCLUDE_C_API /* End of file. */ diff --git a/modules/core/src/datastructs.cpp b/modules/core/src/datastructs.cpp index 61adf3493e..cd9196a130 100644 --- a/modules/core/src/datastructs.cpp +++ b/modules/core/src/datastructs.cpp @@ -40,6 +40,8 @@ //M*/ #include "precomp.hpp" +#ifndef OPENCV_EXCLUDE_C_API + /* default alignment for dynamic data strucutures, resided in storages. */ #define CV_STRUCT_ALIGN ((int)sizeof(double)) @@ -3585,4 +3587,5 @@ void seqInsertSlice( CvSeq* seq, int before_index, const CvArr* from_arr ) } +#endif // OPENCV_EXCLUDE_C_API /* End of file. */ diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index b307703a32..e378f31e66 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -4640,6 +4640,9 @@ int cv::getOptimalDFTSize( int size0 ) return optimalDFTSizeTab[b]; } + +#ifndef OPENCV_EXCLUDE_C_API + CV_IMPL void cvDFT( const CvArr* srcarr, CvArr* dstarr, int flags, int nonzero_rows ) { @@ -4695,4 +4698,5 @@ cvGetOptimalDFTSize( int size0 ) return cv::getOptimalDFTSize(size0); } +#endif // OPENCV_EXCLUDE_C_API /* End of file. */ diff --git a/modules/core/src/lapack.cpp b/modules/core/src/lapack.cpp index 486b7a5aba..9bca6a8211 100644 --- a/modules/core/src/lapack.cpp +++ b/modules/core/src/lapack.cpp @@ -753,8 +753,6 @@ SVBkSb( int m, int n, const double* w, size_t wstep, (double*)alignPtr(buffer, sizeof(double)), DBL_EPSILON*2 ); } -} - /****************************************************************************************\ * Determinant of the matrix * \****************************************************************************************/ @@ -764,7 +762,7 @@ SVBkSb( int m, int n, const double* w, size_t wstep, m(0,1)*((double)m(1,0)*m(2,2) - (double)m(1,2)*m(2,0)) + \ m(0,2)*((double)m(1,0)*m(2,1) - (double)m(1,1)*m(2,0))) -double cv::determinant( InputArray _mat ) +double determinant( InputArray _mat ) { CV_INSTRUMENT_REGION(); @@ -842,7 +840,7 @@ double cv::determinant( InputArray _mat ) #define Df( y, x ) ((float*)(dstdata + y*dststep))[x] #define Dd( y, x ) ((double*)(dstdata + y*dststep))[x] -double cv::invert( InputArray _src, OutputArray _dst, int method ) +double invert( InputArray _src, OutputArray _dst, int method ) { CV_INSTRUMENT_REGION(); @@ -1069,13 +1067,19 @@ double cv::invert( InputArray _src, OutputArray _dst, int method ) return result; } +UMat UMat::inv(int method) const +{ + UMat m; + invert(*this, m, method); + return m; +} /****************************************************************************************\ * Solving a linear system * \****************************************************************************************/ -bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int method ) +bool solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int method ) { CV_INSTRUMENT_REGION(); @@ -1374,7 +1378,7 @@ bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int meth /////////////////// finding eigenvalues and eigenvectors of a symmetric matrix /////////////// -bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects ) +bool eigen( InputArray _src, OutputArray _evals, OutputArray _evects ) { CV_INSTRUMENT_REGION(); @@ -1396,7 +1400,7 @@ bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects ) const bool evecNeeded = _evects.needed(); const int esOptions = evecNeeded ? Eigen::ComputeEigenvectors : Eigen::EigenvaluesOnly; _evals.create(n, 1, type); - cv::Mat evals = _evals.getMat(); + Mat evals = _evals.getMat(); if ( type == CV_64F ) { Eigen::MatrixXd src_eig, zeros_eig; @@ -1448,9 +1452,6 @@ bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects ) #endif } -namespace cv -{ - static void _SVDcompute( InputArray _aarr, OutputArray _w, OutputArray _u, OutputArray _vt, int flags ) { @@ -1598,6 +1599,9 @@ void cv::SVBackSubst(InputArray w, InputArray u, InputArray vt, InputArray rhs, } + +#ifndef OPENCV_EXCLUDE_C_API + CV_IMPL double cvDet( const CvArr* arr ) { @@ -1789,3 +1793,4 @@ cvSVBkSb( const CvArr* warr, const CvArr* uarr, cv::SVD::backSubst(w, u, v, rhs, dst); CV_Assert( dst.data == dst0.data ); } +#endif // OPENCV_EXCLUDE_C_API diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index a4e5263aa8..9fdf7d7702 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -1637,6 +1637,9 @@ void patchNaNs( InputOutputArray _a, double _val ) } + +#ifndef OPENCV_EXCLUDE_C_API + CV_IMPL float cvCbrt(float value) { return cv::cubeRoot(value); } CV_IMPL float cvFastArctan(float y, float x) { return cv::fastAtan2(y, x); } @@ -1720,6 +1723,7 @@ CV_IMPL int cvCheckArr( const CvArr* arr, int flags, return cv::checkRange(cv::cvarrToMat(arr), (flags & CV_CHECK_QUIET) != 0, 0, minVal, maxVal ); } +#endif // OPENCV_EXCLUDE_C_API /* Finds real roots of cubic, quadratic or linear equation. @@ -2015,6 +2019,8 @@ double cv::solvePoly( InputArray _coeffs0, OutputArray _roots0, int maxIters ) } +#ifndef OPENCV_EXCLUDE_C_API + CV_IMPL int cvSolveCubic( const CvMat* coeffs, CvMat* roots ) { @@ -2034,6 +2040,7 @@ void cvSolvePoly(const CvMat* a, CvMat *r, int maxiter, int) CV_Assert( _r.data == _r0.data ); // check that the array of roots was not reallocated } +#endif // OPENCV_EXCLUDE_C_API // Common constants for dispatched code diff --git a/modules/core/src/matmul.dispatch.cpp b/modules/core/src/matmul.dispatch.cpp index a9b82aee88..e81064ec16 100644 --- a/modules/core/src/matmul.dispatch.cpp +++ b/modules/core/src/matmul.dispatch.cpp @@ -999,8 +999,79 @@ double Mat::dot(InputArray _mat) const return r; } + +#ifdef HAVE_OPENCL + +static bool ocl_dot( InputArray _src1, InputArray _src2, double & res ) +{ + UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1); + + int type = src1.type(), depth = CV_MAT_DEPTH(type), + kercn = ocl::predictOptimalVectorWidth(src1, src2); + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + + if ( !doubleSupport && depth == CV_64F ) + return false; + + int dbsize = ocl::Device::getDefault().maxComputeUnits(); + size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); + int ddepth = std::max(CV_32F, depth); + + int wgs2_aligned = 1; + while (wgs2_aligned < (int)wgs) + wgs2_aligned <<= 1; + wgs2_aligned >>= 1; + + char cvt[40]; + ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, + format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT " + "-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d", + ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth), + ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), + ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt), + (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", + _src1.isContinuous() ? " -D HAVE_SRC_CONT" : "", + _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn)); + if (k.empty()) + return false; + + UMat db(1, dbsize, ddepth); + + ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1), + src2arg = ocl::KernelArg::ReadOnlyNoSize(src2), + dbarg = ocl::KernelArg::PtrWriteOnly(db); + + k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg); + + size_t globalsize = dbsize * wgs; + if (k.run(1, &globalsize, &wgs, false)) + { + res = sum(db.getMat(ACCESS_READ))[0]; + return true; + } + return false; +} + +#endif + +double UMat::dot(InputArray m) const +{ + CV_INSTRUMENT_REGION(); + + CV_Assert(m.sameSize(*this) && m.type() == type()); + +#ifdef HAVE_OPENCL + double r = 0; + CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r) +#endif + + return getMat(ACCESS_READ).dot(m); +} + } // namespace cv:: + +#ifndef OPENCV_EXCLUDE_C_API /****************************************************************************************\ * Earlier API * \****************************************************************************************/ @@ -1225,4 +1296,6 @@ cvBackProjectPCA( const CvArr* proj_arr, const CvArr* avg_arr, CV_Assert(dst0.data == dst.data); } +#endif // OPENCV_EXCLUDE_C_API + /* End of file. */ diff --git a/modules/core/src/matrix_c.cpp b/modules/core/src/matrix_c.cpp index 2fead4100c..baa61bb66f 100644 --- a/modules/core/src/matrix_c.cpp +++ b/modules/core/src/matrix_c.cpp @@ -6,6 +6,7 @@ #include "opencv2/core/mat.hpp" #include "opencv2/core/types_c.h" +#ifndef OPENCV_EXCLUDE_C_API // glue CvMatND cvMatND(const cv::Mat& m) @@ -360,7 +361,6 @@ cvSort( const CvArr* _src, CvArr* _dst, CvArr* _idx, int flags ) } } - CV_IMPL int cvKMeans2( const CvArr* _samples, int cluster_count, CvArr* _labels, CvTermCriteria termcrit, int attempts, CvRNG*, @@ -389,3 +389,5 @@ cvKMeans2( const CvArr* _samples, int cluster_count, CvArr* _labels, *_compactness = compactness; return 1; } + +#endif // OPENCV_EXCLUDE_C_API diff --git a/modules/core/src/matrix_operations.cpp b/modules/core/src/matrix_operations.cpp index 6f863b8871..ca8edc4771 100644 --- a/modules/core/src/matrix_operations.cpp +++ b/modules/core/src/matrix_operations.cpp @@ -226,6 +226,23 @@ void cv::setIdentity( InputOutputArray _m, const Scalar& s ) } } + +namespace cv { + +UMat UMat::eye(int rows, int cols, int type) +{ + return UMat::eye(Size(cols, rows), type); +} + +UMat UMat::eye(Size size, int type) +{ + UMat m(size, type); + setIdentity(m); + return m; +} + +} // namespace + //////////////////////////////////////////// trace /////////////////////////////////////////// cv::Scalar cv::trace( InputArray _m ) @@ -260,285 +277,6 @@ cv::Scalar cv::trace( InputArray _m ) return cv::sum(m.diag()); } -////////////////////////////////////// transpose ///////////////////////////////////////// - -namespace cv -{ - -template static void -transpose_( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ) -{ - int i=0, j, m = sz.width, n = sz.height; - - #if CV_ENABLE_UNROLLED - for(; i <= m - 4; i += 4 ) - { - T* d0 = (T*)(dst + dstep*i); - T* d1 = (T*)(dst + dstep*(i+1)); - T* d2 = (T*)(dst + dstep*(i+2)); - T* d3 = (T*)(dst + dstep*(i+3)); - - for( j = 0; j <= n - 4; j += 4 ) - { - const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j); - const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1)); - const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2)); - const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3)); - - d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0]; - d1[j] = s0[1]; d1[j+1] = s1[1]; d1[j+2] = s2[1]; d1[j+3] = s3[1]; - d2[j] = s0[2]; d2[j+1] = s1[2]; d2[j+2] = s2[2]; d2[j+3] = s3[2]; - d3[j] = s0[3]; d3[j+1] = s1[3]; d3[j+2] = s2[3]; d3[j+3] = s3[3]; - } - - for( ; j < n; j++ ) - { - const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep); - d0[j] = s0[0]; d1[j] = s0[1]; d2[j] = s0[2]; d3[j] = s0[3]; - } - } - #endif - for( ; i < m; i++ ) - { - T* d0 = (T*)(dst + dstep*i); - j = 0; - #if CV_ENABLE_UNROLLED - for(; j <= n - 4; j += 4 ) - { - const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j); - const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1)); - const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2)); - const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3)); - - d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0]; - } - #endif - for( ; j < n; j++ ) - { - const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep); - d0[j] = s0[0]; - } - } -} - -template static void -transposeI_( uchar* data, size_t step, int n ) -{ - for( int i = 0; i < n; i++ ) - { - T* row = (T*)(data + step*i); - uchar* data1 = data + i*sizeof(T); - for( int j = i+1; j < n; j++ ) - std::swap( row[j], *(T*)(data1 + step*j) ); - } -} - -typedef void (*TransposeFunc)( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ); -typedef void (*TransposeInplaceFunc)( uchar* data, size_t step, int n ); - -#define DEF_TRANSPOSE_FUNC(suffix, type) \ -static void transpose_##suffix( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ) \ -{ transpose_(src, sstep, dst, dstep, sz); } \ -\ -static void transposeI_##suffix( uchar* data, size_t step, int n ) \ -{ transposeI_(data, step, n); } - -DEF_TRANSPOSE_FUNC(8u, uchar) -DEF_TRANSPOSE_FUNC(16u, ushort) -DEF_TRANSPOSE_FUNC(8uC3, Vec3b) -DEF_TRANSPOSE_FUNC(32s, int) -DEF_TRANSPOSE_FUNC(16uC3, Vec3s) -DEF_TRANSPOSE_FUNC(32sC2, Vec2i) -DEF_TRANSPOSE_FUNC(32sC3, Vec3i) -DEF_TRANSPOSE_FUNC(32sC4, Vec4i) -DEF_TRANSPOSE_FUNC(32sC6, Vec6i) -DEF_TRANSPOSE_FUNC(32sC8, Vec8i) - -static TransposeFunc transposeTab[] = -{ - 0, transpose_8u, transpose_16u, transpose_8uC3, transpose_32s, 0, transpose_16uC3, 0, - transpose_32sC2, 0, 0, 0, transpose_32sC3, 0, 0, 0, transpose_32sC4, - 0, 0, 0, 0, 0, 0, 0, transpose_32sC6, 0, 0, 0, 0, 0, 0, 0, transpose_32sC8 -}; - -static TransposeInplaceFunc transposeInplaceTab[] = -{ - 0, transposeI_8u, transposeI_16u, transposeI_8uC3, transposeI_32s, 0, transposeI_16uC3, 0, - transposeI_32sC2, 0, 0, 0, transposeI_32sC3, 0, 0, 0, transposeI_32sC4, - 0, 0, 0, 0, 0, 0, 0, transposeI_32sC6, 0, 0, 0, 0, 0, 0, 0, transposeI_32sC8 -}; - -#ifdef HAVE_OPENCL - -static bool ocl_transpose( InputArray _src, OutputArray _dst ) -{ - const ocl::Device & dev = ocl::Device::getDefault(); - const int TILE_DIM = 32, BLOCK_ROWS = 8; - int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type), - rowsPerWI = dev.isIntel() ? 4 : 1; - - UMat src = _src.getUMat(); - _dst.create(src.cols, src.rows, type); - UMat dst = _dst.getUMat(); - - String kernelName("transpose"); - bool inplace = dst.u == src.u; - - if (inplace) - { - CV_Assert(dst.cols == dst.rows); - kernelName += "_inplace"; - } - else - { - // check required local memory size - size_t required_local_memory = (size_t) TILE_DIM*(TILE_DIM+1)*CV_ELEM_SIZE(type); - if (required_local_memory > ocl::Device::getDefault().localMemSize()) - return false; - } - - ocl::Kernel k(kernelName.c_str(), ocl::core::transpose_oclsrc, - format("-D T=%s -D T1=%s -D cn=%d -D TILE_DIM=%d -D BLOCK_ROWS=%d -D rowsPerWI=%d%s", - ocl::memopTypeToStr(type), ocl::memopTypeToStr(depth), - cn, TILE_DIM, BLOCK_ROWS, rowsPerWI, inplace ? " -D INPLACE" : "")); - if (k.empty()) - return false; - - if (inplace) - k.args(ocl::KernelArg::ReadWriteNoSize(dst), dst.rows); - else - k.args(ocl::KernelArg::ReadOnly(src), - ocl::KernelArg::WriteOnlyNoSize(dst)); - - size_t localsize[2] = { TILE_DIM, BLOCK_ROWS }; - size_t globalsize[2] = { (size_t)src.cols, inplace ? ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI : (divUp((size_t)src.rows, TILE_DIM) * BLOCK_ROWS) }; - - if (inplace && dev.isIntel()) - { - localsize[0] = 16; - localsize[1] = dev.maxWorkGroupSize() / localsize[0]; - } - - return k.run(2, globalsize, localsize, false); -} - -#endif - -#ifdef HAVE_IPP -static bool ipp_transpose( Mat &src, Mat &dst ) -{ - CV_INSTRUMENT_REGION_IPP(); - - int type = src.type(); - typedef IppStatus (CV_STDCALL * IppiTranspose)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize); - typedef IppStatus (CV_STDCALL * IppiTransposeI)(const void * pSrcDst, int srcDstStep, IppiSize roiSize); - IppiTranspose ippiTranspose = 0; - IppiTransposeI ippiTranspose_I = 0; - - if (dst.data == src.data && dst.cols == dst.rows) - { - CV_SUPPRESS_DEPRECATED_START - ippiTranspose_I = - type == CV_8UC1 ? (IppiTransposeI)ippiTranspose_8u_C1IR : - type == CV_8UC3 ? (IppiTransposeI)ippiTranspose_8u_C3IR : - type == CV_8UC4 ? (IppiTransposeI)ippiTranspose_8u_C4IR : - type == CV_16UC1 ? (IppiTransposeI)ippiTranspose_16u_C1IR : - type == CV_16UC3 ? (IppiTransposeI)ippiTranspose_16u_C3IR : - type == CV_16UC4 ? (IppiTransposeI)ippiTranspose_16u_C4IR : - type == CV_16SC1 ? (IppiTransposeI)ippiTranspose_16s_C1IR : - type == CV_16SC3 ? (IppiTransposeI)ippiTranspose_16s_C3IR : - type == CV_16SC4 ? (IppiTransposeI)ippiTranspose_16s_C4IR : - type == CV_32SC1 ? (IppiTransposeI)ippiTranspose_32s_C1IR : - type == CV_32SC3 ? (IppiTransposeI)ippiTranspose_32s_C3IR : - type == CV_32SC4 ? (IppiTransposeI)ippiTranspose_32s_C4IR : - type == CV_32FC1 ? (IppiTransposeI)ippiTranspose_32f_C1IR : - type == CV_32FC3 ? (IppiTransposeI)ippiTranspose_32f_C3IR : - type == CV_32FC4 ? (IppiTransposeI)ippiTranspose_32f_C4IR : 0; - CV_SUPPRESS_DEPRECATED_END - } - else - { - ippiTranspose = - type == CV_8UC1 ? (IppiTranspose)ippiTranspose_8u_C1R : - type == CV_8UC3 ? (IppiTranspose)ippiTranspose_8u_C3R : - type == CV_8UC4 ? (IppiTranspose)ippiTranspose_8u_C4R : - type == CV_16UC1 ? (IppiTranspose)ippiTranspose_16u_C1R : - type == CV_16UC3 ? (IppiTranspose)ippiTranspose_16u_C3R : - type == CV_16UC4 ? (IppiTranspose)ippiTranspose_16u_C4R : - type == CV_16SC1 ? (IppiTranspose)ippiTranspose_16s_C1R : - type == CV_16SC3 ? (IppiTranspose)ippiTranspose_16s_C3R : - type == CV_16SC4 ? (IppiTranspose)ippiTranspose_16s_C4R : - type == CV_32SC1 ? (IppiTranspose)ippiTranspose_32s_C1R : - type == CV_32SC3 ? (IppiTranspose)ippiTranspose_32s_C3R : - type == CV_32SC4 ? (IppiTranspose)ippiTranspose_32s_C4R : - type == CV_32FC1 ? (IppiTranspose)ippiTranspose_32f_C1R : - type == CV_32FC3 ? (IppiTranspose)ippiTranspose_32f_C3R : - type == CV_32FC4 ? (IppiTranspose)ippiTranspose_32f_C4R : 0; - } - - IppiSize roiSize = { src.cols, src.rows }; - if (ippiTranspose != 0) - { - if (CV_INSTRUMENT_FUN_IPP(ippiTranspose, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, roiSize) >= 0) - return true; - } - else if (ippiTranspose_I != 0) - { - if (CV_INSTRUMENT_FUN_IPP(ippiTranspose_I, dst.ptr(), (int)dst.step, roiSize) >= 0) - return true; - } - return false; -} -#endif - -} - - -void cv::transpose( InputArray _src, OutputArray _dst ) -{ - CV_INSTRUMENT_REGION(); - - int type = _src.type(), esz = CV_ELEM_SIZE(type); - CV_Assert( _src.dims() <= 2 && esz <= 32 ); - - CV_OCL_RUN(_dst.isUMat(), - ocl_transpose(_src, _dst)) - - Mat src = _src.getMat(); - if( src.empty() ) - { - _dst.release(); - return; - } - - _dst.create(src.cols, src.rows, src.type()); - Mat dst = _dst.getMat(); - - // handle the case of single-column/single-row matrices, stored in STL vectors. - if( src.rows != dst.cols || src.cols != dst.rows ) - { - CV_Assert( src.size() == dst.size() && (src.cols == 1 || src.rows == 1) ); - src.copyTo(dst); - return; - } - - CV_IPP_RUN_FAST(ipp_transpose(src, dst)) - - if( dst.data == src.data ) - { - TransposeInplaceFunc func = transposeInplaceTab[esz]; - CV_Assert( func != 0 ); - CV_Assert( dst.cols == dst.rows ); - func( dst.ptr(), dst.step, dst.rows ); - } - else - { - TransposeFunc func = transposeTab[esz]; - CV_Assert( func != 0 ); - func( src.ptr(), src.step, dst.ptr(), dst.step, src.size() ); - } -} - ////////////////////////////////////// completeSymm ///////////////////////////////////////// diff --git a/modules/core/src/matrix_transform.cpp b/modules/core/src/matrix_transform.cpp new file mode 100644 index 0000000000..37bc273b4d --- /dev/null +++ b/modules/core/src/matrix_transform.cpp @@ -0,0 +1,770 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#include "precomp.hpp" +#include "opencl_kernels_core.hpp" + +namespace cv { + +////////////////////////////////////// transpose ///////////////////////////////////////// + +template static void +transpose_( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ) +{ + int i=0, j, m = sz.width, n = sz.height; + + #if CV_ENABLE_UNROLLED + for(; i <= m - 4; i += 4 ) + { + T* d0 = (T*)(dst + dstep*i); + T* d1 = (T*)(dst + dstep*(i+1)); + T* d2 = (T*)(dst + dstep*(i+2)); + T* d3 = (T*)(dst + dstep*(i+3)); + + for( j = 0; j <= n - 4; j += 4 ) + { + const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j); + const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1)); + const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2)); + const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3)); + + d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0]; + d1[j] = s0[1]; d1[j+1] = s1[1]; d1[j+2] = s2[1]; d1[j+3] = s3[1]; + d2[j] = s0[2]; d2[j+1] = s1[2]; d2[j+2] = s2[2]; d2[j+3] = s3[2]; + d3[j] = s0[3]; d3[j+1] = s1[3]; d3[j+2] = s2[3]; d3[j+3] = s3[3]; + } + + for( ; j < n; j++ ) + { + const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep); + d0[j] = s0[0]; d1[j] = s0[1]; d2[j] = s0[2]; d3[j] = s0[3]; + } + } + #endif + for( ; i < m; i++ ) + { + T* d0 = (T*)(dst + dstep*i); + j = 0; + #if CV_ENABLE_UNROLLED + for(; j <= n - 4; j += 4 ) + { + const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j); + const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1)); + const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2)); + const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3)); + + d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0]; + } + #endif + for( ; j < n; j++ ) + { + const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep); + d0[j] = s0[0]; + } + } +} + +template static void +transposeI_( uchar* data, size_t step, int n ) +{ + for( int i = 0; i < n; i++ ) + { + T* row = (T*)(data + step*i); + uchar* data1 = data + i*sizeof(T); + for( int j = i+1; j < n; j++ ) + std::swap( row[j], *(T*)(data1 + step*j) ); + } +} + +typedef void (*TransposeFunc)( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ); +typedef void (*TransposeInplaceFunc)( uchar* data, size_t step, int n ); + +#define DEF_TRANSPOSE_FUNC(suffix, type) \ +static void transpose_##suffix( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ) \ +{ transpose_(src, sstep, dst, dstep, sz); } \ +\ +static void transposeI_##suffix( uchar* data, size_t step, int n ) \ +{ transposeI_(data, step, n); } + +DEF_TRANSPOSE_FUNC(8u, uchar) +DEF_TRANSPOSE_FUNC(16u, ushort) +DEF_TRANSPOSE_FUNC(8uC3, Vec3b) +DEF_TRANSPOSE_FUNC(32s, int) +DEF_TRANSPOSE_FUNC(16uC3, Vec3s) +DEF_TRANSPOSE_FUNC(32sC2, Vec2i) +DEF_TRANSPOSE_FUNC(32sC3, Vec3i) +DEF_TRANSPOSE_FUNC(32sC4, Vec4i) +DEF_TRANSPOSE_FUNC(32sC6, Vec6i) +DEF_TRANSPOSE_FUNC(32sC8, Vec8i) + +static TransposeFunc transposeTab[] = +{ + 0, transpose_8u, transpose_16u, transpose_8uC3, transpose_32s, 0, transpose_16uC3, 0, + transpose_32sC2, 0, 0, 0, transpose_32sC3, 0, 0, 0, transpose_32sC4, + 0, 0, 0, 0, 0, 0, 0, transpose_32sC6, 0, 0, 0, 0, 0, 0, 0, transpose_32sC8 +}; + +static TransposeInplaceFunc transposeInplaceTab[] = +{ + 0, transposeI_8u, transposeI_16u, transposeI_8uC3, transposeI_32s, 0, transposeI_16uC3, 0, + transposeI_32sC2, 0, 0, 0, transposeI_32sC3, 0, 0, 0, transposeI_32sC4, + 0, 0, 0, 0, 0, 0, 0, transposeI_32sC6, 0, 0, 0, 0, 0, 0, 0, transposeI_32sC8 +}; + +#ifdef HAVE_OPENCL + +static bool ocl_transpose( InputArray _src, OutputArray _dst ) +{ + const ocl::Device & dev = ocl::Device::getDefault(); + const int TILE_DIM = 32, BLOCK_ROWS = 8; + int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type), + rowsPerWI = dev.isIntel() ? 4 : 1; + + UMat src = _src.getUMat(); + _dst.create(src.cols, src.rows, type); + UMat dst = _dst.getUMat(); + + String kernelName("transpose"); + bool inplace = dst.u == src.u; + + if (inplace) + { + CV_Assert(dst.cols == dst.rows); + kernelName += "_inplace"; + } + else + { + // check required local memory size + size_t required_local_memory = (size_t) TILE_DIM*(TILE_DIM+1)*CV_ELEM_SIZE(type); + if (required_local_memory > ocl::Device::getDefault().localMemSize()) + return false; + } + + ocl::Kernel k(kernelName.c_str(), ocl::core::transpose_oclsrc, + format("-D T=%s -D T1=%s -D cn=%d -D TILE_DIM=%d -D BLOCK_ROWS=%d -D rowsPerWI=%d%s", + ocl::memopTypeToStr(type), ocl::memopTypeToStr(depth), + cn, TILE_DIM, BLOCK_ROWS, rowsPerWI, inplace ? " -D INPLACE" : "")); + if (k.empty()) + return false; + + if (inplace) + k.args(ocl::KernelArg::ReadWriteNoSize(dst), dst.rows); + else + k.args(ocl::KernelArg::ReadOnly(src), + ocl::KernelArg::WriteOnlyNoSize(dst)); + + size_t localsize[2] = { TILE_DIM, BLOCK_ROWS }; + size_t globalsize[2] = { (size_t)src.cols, inplace ? ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI : (divUp((size_t)src.rows, TILE_DIM) * BLOCK_ROWS) }; + + if (inplace && dev.isIntel()) + { + localsize[0] = 16; + localsize[1] = dev.maxWorkGroupSize() / localsize[0]; + } + + return k.run(2, globalsize, localsize, false); +} + +#endif + +#ifdef HAVE_IPP +static bool ipp_transpose( Mat &src, Mat &dst ) +{ + CV_INSTRUMENT_REGION_IPP(); + + int type = src.type(); + typedef IppStatus (CV_STDCALL * IppiTranspose)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize); + typedef IppStatus (CV_STDCALL * IppiTransposeI)(const void * pSrcDst, int srcDstStep, IppiSize roiSize); + IppiTranspose ippiTranspose = 0; + IppiTransposeI ippiTranspose_I = 0; + + if (dst.data == src.data && dst.cols == dst.rows) + { + CV_SUPPRESS_DEPRECATED_START + ippiTranspose_I = + type == CV_8UC1 ? (IppiTransposeI)ippiTranspose_8u_C1IR : + type == CV_8UC3 ? (IppiTransposeI)ippiTranspose_8u_C3IR : + type == CV_8UC4 ? (IppiTransposeI)ippiTranspose_8u_C4IR : + type == CV_16UC1 ? (IppiTransposeI)ippiTranspose_16u_C1IR : + type == CV_16UC3 ? (IppiTransposeI)ippiTranspose_16u_C3IR : + type == CV_16UC4 ? (IppiTransposeI)ippiTranspose_16u_C4IR : + type == CV_16SC1 ? (IppiTransposeI)ippiTranspose_16s_C1IR : + type == CV_16SC3 ? (IppiTransposeI)ippiTranspose_16s_C3IR : + type == CV_16SC4 ? (IppiTransposeI)ippiTranspose_16s_C4IR : + type == CV_32SC1 ? (IppiTransposeI)ippiTranspose_32s_C1IR : + type == CV_32SC3 ? (IppiTransposeI)ippiTranspose_32s_C3IR : + type == CV_32SC4 ? (IppiTransposeI)ippiTranspose_32s_C4IR : + type == CV_32FC1 ? (IppiTransposeI)ippiTranspose_32f_C1IR : + type == CV_32FC3 ? (IppiTransposeI)ippiTranspose_32f_C3IR : + type == CV_32FC4 ? (IppiTransposeI)ippiTranspose_32f_C4IR : 0; + CV_SUPPRESS_DEPRECATED_END + } + else + { + ippiTranspose = + type == CV_8UC1 ? (IppiTranspose)ippiTranspose_8u_C1R : + type == CV_8UC3 ? (IppiTranspose)ippiTranspose_8u_C3R : + type == CV_8UC4 ? (IppiTranspose)ippiTranspose_8u_C4R : + type == CV_16UC1 ? (IppiTranspose)ippiTranspose_16u_C1R : + type == CV_16UC3 ? (IppiTranspose)ippiTranspose_16u_C3R : + type == CV_16UC4 ? (IppiTranspose)ippiTranspose_16u_C4R : + type == CV_16SC1 ? (IppiTranspose)ippiTranspose_16s_C1R : + type == CV_16SC3 ? (IppiTranspose)ippiTranspose_16s_C3R : + type == CV_16SC4 ? (IppiTranspose)ippiTranspose_16s_C4R : + type == CV_32SC1 ? (IppiTranspose)ippiTranspose_32s_C1R : + type == CV_32SC3 ? (IppiTranspose)ippiTranspose_32s_C3R : + type == CV_32SC4 ? (IppiTranspose)ippiTranspose_32s_C4R : + type == CV_32FC1 ? (IppiTranspose)ippiTranspose_32f_C1R : + type == CV_32FC3 ? (IppiTranspose)ippiTranspose_32f_C3R : + type == CV_32FC4 ? (IppiTranspose)ippiTranspose_32f_C4R : 0; + } + + IppiSize roiSize = { src.cols, src.rows }; + if (ippiTranspose != 0) + { + if (CV_INSTRUMENT_FUN_IPP(ippiTranspose, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, roiSize) >= 0) + return true; + } + else if (ippiTranspose_I != 0) + { + if (CV_INSTRUMENT_FUN_IPP(ippiTranspose_I, dst.ptr(), (int)dst.step, roiSize) >= 0) + return true; + } + return false; +} +#endif + + +void transpose( InputArray _src, OutputArray _dst ) +{ + CV_INSTRUMENT_REGION(); + + int type = _src.type(), esz = CV_ELEM_SIZE(type); + CV_Assert( _src.dims() <= 2 && esz <= 32 ); + + CV_OCL_RUN(_dst.isUMat(), + ocl_transpose(_src, _dst)) + + Mat src = _src.getMat(); + if( src.empty() ) + { + _dst.release(); + return; + } + + _dst.create(src.cols, src.rows, src.type()); + Mat dst = _dst.getMat(); + + // handle the case of single-column/single-row matrices, stored in STL vectors. + if( src.rows != dst.cols || src.cols != dst.rows ) + { + CV_Assert( src.size() == dst.size() && (src.cols == 1 || src.rows == 1) ); + src.copyTo(dst); + return; + } + + CV_IPP_RUN_FAST(ipp_transpose(src, dst)) + + if( dst.data == src.data ) + { + TransposeInplaceFunc func = transposeInplaceTab[esz]; + CV_Assert( func != 0 ); + CV_Assert( dst.cols == dst.rows ); + func( dst.ptr(), dst.step, dst.rows ); + } + else + { + TransposeFunc func = transposeTab[esz]; + CV_Assert( func != 0 ); + func( src.ptr(), src.step, dst.ptr(), dst.step, src.size() ); + } +} + + +#if CV_SIMD128 +template CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) +{ + typedef typename V::lane_type T; + int end = (int)(size.width*esz); + int width = (end + 1)/2; + int width_1 = width & -v_uint8x16::nlanes; + int i, j; + +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(src, dst)); +#endif + + for( ; size.height--; src += sstep, dst += dstep ) + { + for( i = 0, j = end; i < width_1; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes ) + { + V t0, t1; + + t0 = v_load((T*)((uchar*)src + i)); + t1 = v_load((T*)((uchar*)src + j - v_uint8x16::nlanes)); + t0 = v_reverse(t0); + t1 = v_reverse(t1); + v_store((T*)(dst + j - v_uint8x16::nlanes), t0); + v_store((T*)(dst + i), t1); + } + if (isAligned(src, dst)) + { + for ( ; i < width; i += sizeof(T), j -= sizeof(T) ) + { + T t0, t1; + + t0 = *((T*)((uchar*)src + i)); + t1 = *((T*)((uchar*)src + j - sizeof(T))); + *((T*)(dst + j - sizeof(T))) = t0; + *((T*)(dst + i)) = t1; + } + } + else + { + for ( ; i < width; i += sizeof(T), j -= sizeof(T) ) + { + for (int k = 0; k < (int)sizeof(T); k++) + { + uchar t0, t1; + + t0 = *((uchar*)src + i + k); + t1 = *((uchar*)src + j + k - sizeof(T)); + *(dst + j + k - sizeof(T)) = t0; + *(dst + i + k) = t1; + } + } + } + } +} + +template CV_ALWAYS_INLINE void flipHoriz_double( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) +{ + int end = (int)(size.width*esz); + int width = (end + 1)/2; + +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(src, dst)); + CV_Assert(isAligned(src, dst)); +#endif + + for( ; size.height--; src += sstep, dst += dstep ) + { + for ( int i = 0, j = end; i < width; i += sizeof(T1) + sizeof(T2), j -= sizeof(T1) + sizeof(T2) ) + { + T1 t0, t1; + T2 t2, t3; + + t0 = *((T1*)((uchar*)src + i)); + t2 = *((T2*)((uchar*)src + i + sizeof(T1))); + t1 = *((T1*)((uchar*)src + j - sizeof(T1) - sizeof(T2))); + t3 = *((T2*)((uchar*)src + j - sizeof(T2))); + *((T1*)(dst + j - sizeof(T1) - sizeof(T2))) = t0; + *((T2*)(dst + j - sizeof(T2))) = t2; + *((T1*)(dst + i)) = t1; + *((T2*)(dst + i + sizeof(T1))) = t3; + } + } +} +#endif + +static void +flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) +{ +#if CV_SIMD +#if CV_STRONG_ALIGNMENT + size_t alignmentMark = ((size_t)src)|((size_t)dst)|sstep|dstep; +#endif + if (esz == 2 * v_uint8x16::nlanes) + { + int end = (int)(size.width*esz); + int width = end/2; + + for( ; size.height--; src += sstep, dst += dstep ) + { + for( int i = 0, j = end - 2 * v_uint8x16::nlanes; i < width; i += 2 * v_uint8x16::nlanes, j -= 2 * v_uint8x16::nlanes ) + { +#if CV_SIMD256 + v_uint8x32 t0, t1; + + t0 = v256_load((uchar*)src + i); + t1 = v256_load((uchar*)src + j); + v_store(dst + j, t0); + v_store(dst + i, t1); +#else + v_uint8x16 t0, t1, t2, t3; + + t0 = v_load((uchar*)src + i); + t1 = v_load((uchar*)src + i + v_uint8x16::nlanes); + t2 = v_load((uchar*)src + j); + t3 = v_load((uchar*)src + j + v_uint8x16::nlanes); + v_store(dst + j, t0); + v_store(dst + j + v_uint8x16::nlanes, t1); + v_store(dst + i, t2); + v_store(dst + i + v_uint8x16::nlanes, t3); +#endif + } + } + } + else if (esz == v_uint8x16::nlanes) + { + int end = (int)(size.width*esz); + int width = end/2; + + for( ; size.height--; src += sstep, dst += dstep ) + { + for( int i = 0, j = end - v_uint8x16::nlanes; i < width; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes ) + { + v_uint8x16 t0, t1; + + t0 = v_load((uchar*)src + i); + t1 = v_load((uchar*)src + j); + v_store(dst + j, t0); + v_store(dst + i, t1); + } + } + } + else if (esz == 8 +#if CV_STRONG_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) + { + flipHoriz_single(src, sstep, dst, dstep, size, esz); + } + else if (esz == 4 +#if CV_STRONG_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) + { + flipHoriz_single(src, sstep, dst, dstep, size, esz); + } + else if (esz == 2 +#if CV_STRONG_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) + { + flipHoriz_single(src, sstep, dst, dstep, size, esz); + } + else if (esz == 1) + { + flipHoriz_single(src, sstep, dst, dstep, size, esz); + } + else if (esz == 24 +#if CV_STRONG_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) + { + int end = (int)(size.width*esz); + int width = (end + 1)/2; + + for( ; size.height--; src += sstep, dst += dstep ) + { + for ( int i = 0, j = end; i < width; i += v_uint8x16::nlanes + sizeof(uint64_t), j -= v_uint8x16::nlanes + sizeof(uint64_t) ) + { + v_uint8x16 t0, t1; + uint64_t t2, t3; + + t0 = v_load((uchar*)src + i); + t2 = *((uint64_t*)((uchar*)src + i + v_uint8x16::nlanes)); + t1 = v_load((uchar*)src + j - v_uint8x16::nlanes - sizeof(uint64_t)); + t3 = *((uint64_t*)((uchar*)src + j - sizeof(uint64_t))); + v_store(dst + j - v_uint8x16::nlanes - sizeof(uint64_t), t0); + *((uint64_t*)(dst + j - sizeof(uint64_t))) = t2; + v_store(dst + i, t1); + *((uint64_t*)(dst + i + v_uint8x16::nlanes)) = t3; + } + } + } +#if !CV_STRONG_ALIGNMENT + else if (esz == 12) + { + flipHoriz_double(src, sstep, dst, dstep, size, esz); + } + else if (esz == 6) + { + flipHoriz_double(src, sstep, dst, dstep, size, esz); + } + else if (esz == 3) + { + flipHoriz_double(src, sstep, dst, dstep, size, esz); + } +#endif + else +#endif // CV_SIMD + { + int i, j, limit = (int)(((size.width + 1)/2)*esz); + AutoBuffer _tab(size.width*esz); + int* tab = _tab.data(); + + for( i = 0; i < size.width; i++ ) + for( size_t k = 0; k < esz; k++ ) + tab[i*esz + k] = (int)((size.width - i - 1)*esz + k); + + for( ; size.height--; src += sstep, dst += dstep ) + { + for( i = 0; i < limit; i++ ) + { + j = tab[i]; + uchar t0 = src[i], t1 = src[j]; + dst[i] = t1; dst[j] = t0; + } + } + } +} + +static void +flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size, size_t esz ) +{ + const uchar* src1 = src0 + (size.height - 1)*sstep; + uchar* dst1 = dst0 + (size.height - 1)*dstep; + size.width *= (int)esz; + + for( int y = 0; y < (size.height + 1)/2; y++, src0 += sstep, src1 -= sstep, + dst0 += dstep, dst1 -= dstep ) + { + int i = 0; +#if CV_SIMD +#if CV_STRONG_ALIGNMENT + if (isAligned(src0, src1, dst0, dst1)) +#endif + { + for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) + { + v_int32 t0 = vx_load((int*)(src0 + i)); + v_int32 t1 = vx_load((int*)(src1 + i)); + vx_store((int*)(dst0 + i), t1); + vx_store((int*)(dst1 + i), t0); + } + } +#if CV_STRONG_ALIGNMENT + else + { + for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) + { + v_uint8 t0 = vx_load(src0 + i); + v_uint8 t1 = vx_load(src1 + i); + vx_store(dst0 + i, t1); + vx_store(dst1 + i, t0); + } + } +#endif +#endif + + if (isAligned(src0, src1, dst0, dst1)) + { + for( ; i <= size.width - 16; i += 16 ) + { + int t0 = ((int*)(src0 + i))[0]; + int t1 = ((int*)(src1 + i))[0]; + + ((int*)(dst0 + i))[0] = t1; + ((int*)(dst1 + i))[0] = t0; + + t0 = ((int*)(src0 + i))[1]; + t1 = ((int*)(src1 + i))[1]; + + ((int*)(dst0 + i))[1] = t1; + ((int*)(dst1 + i))[1] = t0; + + t0 = ((int*)(src0 + i))[2]; + t1 = ((int*)(src1 + i))[2]; + + ((int*)(dst0 + i))[2] = t1; + ((int*)(dst1 + i))[2] = t0; + + t0 = ((int*)(src0 + i))[3]; + t1 = ((int*)(src1 + i))[3]; + + ((int*)(dst0 + i))[3] = t1; + ((int*)(dst1 + i))[3] = t0; + } + + for( ; i <= size.width - 4; i += 4 ) + { + int t0 = ((int*)(src0 + i))[0]; + int t1 = ((int*)(src1 + i))[0]; + + ((int*)(dst0 + i))[0] = t1; + ((int*)(dst1 + i))[0] = t0; + } + } + + for( ; i < size.width; i++ ) + { + uchar t0 = src0[i]; + uchar t1 = src1[i]; + + dst0[i] = t1; + dst1[i] = t0; + } + } +} + +#ifdef HAVE_OPENCL + +enum { FLIP_COLS = 1 << 0, FLIP_ROWS = 1 << 1, FLIP_BOTH = FLIP_ROWS | FLIP_COLS }; + +static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode ) +{ + CV_Assert(flipCode >= -1 && flipCode <= 1); + + const ocl::Device & dev = ocl::Device::getDefault(); + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), + flipType, kercn = std::min(ocl::predictOptimalVectorWidth(_src, _dst), 4); + + bool doubleSupport = dev.doubleFPConfig() > 0; + if (!doubleSupport && depth == CV_64F) + kercn = cn; + + if (cn > 4) + return false; + + const char * kernelName; + if (flipCode == 0) + kernelName = "arithm_flip_rows", flipType = FLIP_ROWS; + else if (flipCode > 0) + kernelName = "arithm_flip_cols", flipType = FLIP_COLS; + else + kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH; + + int pxPerWIy = (dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1; + kercn = (cn!=3 || flipType == FLIP_ROWS) ? std::max(kercn, cn) : cn; + + ocl::Kernel k(kernelName, ocl::core::flip_oclsrc, + format( "-D T=%s -D T1=%s -D DEPTH=%d -D cn=%d -D PIX_PER_WI_Y=%d -D kercn=%d", + kercn != cn ? ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)) : ocl::vecopTypeToStr(CV_MAKE_TYPE(depth, kercn)), + kercn != cn ? ocl::typeToStr(depth) : ocl::vecopTypeToStr(depth), depth, cn, pxPerWIy, kercn)); + if (k.empty()) + return false; + + Size size = _src.size(); + _dst.create(size, type); + UMat src = _src.getUMat(), dst = _dst.getUMat(); + + int cols = size.width * cn / kercn, rows = size.height; + cols = flipType == FLIP_COLS ? (cols + 1) >> 1 : cols; + rows = flipType & FLIP_ROWS ? (rows + 1) >> 1 : rows; + + k.args(ocl::KernelArg::ReadOnlyNoSize(src), + ocl::KernelArg::WriteOnly(dst, cn, kercn), rows, cols); + + size_t maxWorkGroupSize = dev.maxWorkGroupSize(); + CV_Assert(maxWorkGroupSize % 4 == 0); + + size_t globalsize[2] = { (size_t)cols, ((size_t)rows + pxPerWIy - 1) / pxPerWIy }, + localsize[2] = { maxWorkGroupSize / 4, 4 }; + return k.run(2, globalsize, (flipType == FLIP_COLS) && !dev.isIntel() ? localsize : NULL, false); +} + +#endif + +#if defined HAVE_IPP +static bool ipp_flip(Mat &src, Mat &dst, int flip_mode) +{ +#ifdef HAVE_IPP_IW + CV_INSTRUMENT_REGION_IPP(); + + // Details: https://github.com/opencv/opencv/issues/12943 + if (flip_mode <= 0 /* swap rows */ + && cv::ipp::getIppTopFeatures() != ippCPUID_SSE42 + && (int64_t)(src.total()) * src.elemSize() >= CV_BIG_INT(0x80000000)/*2Gb*/ + ) + return false; + + IppiAxis ippMode; + if(flip_mode < 0) + ippMode = ippAxsBoth; + else if(flip_mode == 0) + ippMode = ippAxsHorizontal; + else + ippMode = ippAxsVertical; + + try + { + ::ipp::IwiImage iwSrc = ippiGetImage(src); + ::ipp::IwiImage iwDst = ippiGetImage(dst); + + CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, iwSrc, iwDst, ippMode); + } + catch(const ::ipp::IwException &) + { + return false; + } + + return true; +#else + CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(flip_mode); + return false; +#endif +} +#endif + + +void flip( InputArray _src, OutputArray _dst, int flip_mode ) +{ + CV_INSTRUMENT_REGION(); + + CV_Assert( _src.dims() <= 2 ); + Size size = _src.size(); + + if (flip_mode < 0) + { + if (size.width == 1) + flip_mode = 0; + if (size.height == 1) + flip_mode = 1; + } + + if ((size.width == 1 && flip_mode > 0) || + (size.height == 1 && flip_mode == 0)) + { + return _src.copyTo(_dst); + } + + CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src, _dst, flip_mode)) + + Mat src = _src.getMat(); + int type = src.type(); + _dst.create( size, type ); + Mat dst = _dst.getMat(); + + CV_IPP_RUN_FAST(ipp_flip(src, dst, flip_mode)); + + size_t esz = CV_ELEM_SIZE(type); + + if( flip_mode <= 0 ) + flipVert( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz ); + else + flipHoriz( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz ); + + if( flip_mode < 0 ) + flipHoriz( dst.ptr(), dst.step, dst.ptr(), dst.step, dst.size(), esz ); +} + +void rotate(InputArray _src, OutputArray _dst, int rotateMode) +{ + CV_Assert(_src.dims() <= 2); + + switch (rotateMode) + { + case ROTATE_90_CLOCKWISE: + transpose(_src, _dst); + flip(_dst, _dst, 1); + break; + case ROTATE_180: + flip(_src, _dst, -1); + break; + case ROTATE_90_COUNTERCLOCKWISE: + transpose(_src, _dst); + flip(_dst, _dst, 0); + break; + default: + break; + } +} + +} // namespace diff --git a/modules/core/src/matrix_wrap.cpp b/modules/core/src/matrix_wrap.cpp index 0d439759cc..53e0d24470 100644 --- a/modules/core/src/matrix_wrap.cpp +++ b/modules/core/src/matrix_wrap.cpp @@ -316,6 +316,7 @@ void _InputArray::getUMatVector(std::vector& umv) const cuda::GpuMat _InputArray::getGpuMat() const { +#ifdef HAVE_CUDA int k = kind(); if (k == CUDA_GPU_MAT) @@ -339,14 +340,22 @@ cuda::GpuMat _InputArray::getGpuMat() const return cuda::GpuMat(); CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for cuda::GpuMat and cuda::HostMem"); +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } void _InputArray::getGpuMatVector(std::vector& gpumv) const { +#ifdef HAVE_CUDA int k = kind(); if (k == STD_VECTOR_CUDA_GPU_MAT) { gpumv = *(std::vector*)obj; } +#else + CV_UNUSED(gpumv); + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } ogl::Buffer _InputArray::getOGlBuffer() const { @@ -457,11 +466,15 @@ Size _InputArray::size(int i) const if (k == STD_VECTOR_CUDA_GPU_MAT) { +#ifdef HAVE_CUDA const std::vector& vv = *(const std::vector*)obj; if (i < 0) return vv.empty() ? Size() : Size((int)vv.size(), 1); CV_Assert(i < (int)vv.size()); return vv[i].size(); +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } if( k == STD_VECTOR_UMAT ) @@ -795,6 +808,7 @@ int _InputArray::type(int i) const if (k == STD_VECTOR_CUDA_GPU_MAT) { +#ifdef HAVE_CUDA const std::vector& vv = *(const std::vector*)obj; if (vv.empty()) { @@ -803,6 +817,9 @@ int _InputArray::type(int i) const } CV_Assert(i < (int)vv.size()); return vv[i >= 0 ? i : 0].type(); +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } if( k == OPENGL_BUFFER ) @@ -1164,22 +1181,34 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int { CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == _sz); CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype); +#ifdef HAVE_CUDA ((cuda::GpuMat*)obj)->create(_sz, mtype); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } if( k == OPENGL_BUFFER && i < 0 && !allowTransposed && fixedDepthMask == 0 ) { CV_Assert(!fixedSize() || ((ogl::Buffer*)obj)->size() == _sz); CV_Assert(!fixedType() || ((ogl::Buffer*)obj)->type() == mtype); +#ifdef HAVE_OPENGL ((ogl::Buffer*)obj)->create(_sz, mtype); return; +#else + CV_Error(Error::StsNotImplemented, "OpenGL support is not enabled in this OpenCV build (missing HAVE_OPENGL)"); +#endif } if( k == CUDA_HOST_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 ) { CV_Assert(!fixedSize() || ((cuda::HostMem*)obj)->size() == _sz); CV_Assert(!fixedType() || ((cuda::HostMem*)obj)->type() == mtype); +#ifdef HAVE_CUDA ((cuda::HostMem*)obj)->create(_sz, mtype); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } int sizes[] = {_sz.height, _sz.width}; create(2, sizes, mtype, i, allowTransposed, fixedDepthMask); @@ -1206,22 +1235,34 @@ void _OutputArray::create(int _rows, int _cols, int mtype, int i, bool allowTran { CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == Size(_cols, _rows)); CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype); +#ifdef HAVE_CUDA ((cuda::GpuMat*)obj)->create(_rows, _cols, mtype); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } if( k == OPENGL_BUFFER && i < 0 && !allowTransposed && fixedDepthMask == 0 ) { CV_Assert(!fixedSize() || ((ogl::Buffer*)obj)->size() == Size(_cols, _rows)); CV_Assert(!fixedType() || ((ogl::Buffer*)obj)->type() == mtype); +#ifdef HAVE_OPENGL ((ogl::Buffer*)obj)->create(_rows, _cols, mtype); return; +#else + CV_Error(Error::StsNotImplemented, "OpenGL support is not enabled in this OpenCV build (missing HAVE_OPENGL)"); +#endif } if( k == CUDA_HOST_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 ) { CV_Assert(!fixedSize() || ((cuda::HostMem*)obj)->size() == Size(_cols, _rows)); CV_Assert(!fixedType() || ((cuda::HostMem*)obj)->type() == mtype); +#ifdef HAVE_CUDA ((cuda::HostMem*)obj)->create(_rows, _cols, mtype); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } int sizes[] = {_rows, _cols}; create(2, sizes, mtype, i, allowTransposed, fixedDepthMask); @@ -1644,20 +1685,32 @@ void _OutputArray::release() const if( k == CUDA_GPU_MAT ) { +#ifdef HAVE_CUDA ((cuda::GpuMat*)obj)->release(); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } if( k == CUDA_HOST_MEM ) { +#ifdef HAVE_CUDA ((cuda::HostMem*)obj)->release(); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } if( k == OPENGL_BUFFER ) { +#ifdef HAVE_OPENGL ((ogl::Buffer*)obj)->release(); return; +#else + CV_Error(Error::StsNotImplemented, "OpenGL support is not enabled in this OpenCV build (missing HAVE_OPENGL)"); +#endif } if( k == NONE ) @@ -1688,8 +1741,12 @@ void _OutputArray::release() const } if (k == STD_VECTOR_CUDA_GPU_MAT) { +#ifdef HAVE_CUDA ((std::vector*)obj)->clear(); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type"); } @@ -1797,9 +1854,13 @@ void _OutputArray::setTo(const _InputArray& arr, const _InputArray & mask) const ((UMat*)obj)->setTo(arr, mask); else if( k == CUDA_GPU_MAT ) { +#ifdef HAVE_CUDA Mat value = arr.getMat(); CV_Assert( checkScalar(value, type(), arr.kind(), _InputArray::CUDA_GPU_MAT) ); ((cuda::GpuMat*)obj)->setTo(Scalar(Vec(value.ptr())), mask); +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } else CV_Error(Error::StsNotImplemented, ""); diff --git a/modules/core/src/norm.cpp b/modules/core/src/norm.cpp index b95cd99bd8..601082783e 100644 --- a/modules/core/src/norm.cpp +++ b/modules/core/src/norm.cpp @@ -205,13 +205,10 @@ int normL1_(const uchar* a, const uchar* b, int n) return d; } -}} //cv::hal +} //cv::hal //================================================================================================== -namespace cv -{ - template int normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn) { @@ -591,12 +588,10 @@ static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result) CV_UNUSED(src); CV_UNUSED(normType); CV_UNUSED(mask); CV_UNUSED(result); #endif return false; -} -#endif - -} // cv:: +} // ipp_norm() +#endif // HAVE_IPP -double cv::norm( InputArray _src, int normType, InputArray _mask ) +double norm( InputArray _src, int normType, InputArray _mask ) { CV_INSTRUMENT_REGION(); @@ -769,9 +764,6 @@ double cv::norm( InputArray _src, int normType, InputArray _mask ) //================================================================================================== #ifdef HAVE_OPENCL - -namespace cv { - static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result ) { #ifdef __ANDROID__ @@ -826,15 +818,10 @@ static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArr result /= (s2 + DBL_EPSILON); return true; -} - -} - -#endif +} // ocl_norm() +#endif // HAVE_OPENCL #ifdef HAVE_IPP -namespace cv -{ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArray _mask, double &result) { CV_INSTRUMENT_REGION_IPP(); @@ -1060,12 +1047,11 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra CV_UNUSED(_src1); CV_UNUSED(_src2); CV_UNUSED(normType); CV_UNUSED(_mask); CV_UNUSED(result); #endif return false; -} -} -#endif +} // ipp_norm +#endif // HAVE_IPP -double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask ) +double norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask ) { CV_INSTRUMENT_REGION(); @@ -1234,12 +1220,12 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m return result.d; } -cv::Hamming::ResultType cv::Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const +cv::Hamming::ResultType Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const { return cv::hal::normHamming(a, b, size); } -double cv::PSNR(InputArray _src1, InputArray _src2) +double PSNR(InputArray _src1, InputArray _src2) { CV_INSTRUMENT_REGION(); @@ -1249,3 +1235,141 @@ double cv::PSNR(InputArray _src1, InputArray _src2) double diff = std::sqrt(norm(_src1, _src2, NORM_L2SQR)/(_src1.total()*_src1.channels())); return 20*log10(255./(diff+DBL_EPSILON)); } + + +#ifdef HAVE_OPENCL +static bool ocl_normalize( InputArray _src, InputOutputArray _dst, InputArray _mask, int dtype, + double scale, double delta ) +{ + UMat src = _src.getUMat(); + + if( _mask.empty() ) + src.convertTo( _dst, dtype, scale, delta ); + else if (src.channels() <= 4) + { + const ocl::Device & dev = ocl::Device::getDefault(); + + int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), + ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32F, std::max(sdepth, ddepth)), + rowsPerWI = dev.isIntel() ? 4 : 1; + + float fscale = static_cast(scale), fdelta = static_cast(delta); + bool haveScale = std::fabs(scale - 1) > DBL_EPSILON, + haveZeroScale = !(std::fabs(scale) > DBL_EPSILON), + haveDelta = std::fabs(delta) > DBL_EPSILON, + doubleSupport = dev.doubleFPConfig() > 0; + + if (!haveScale && !haveDelta && stype == dtype) + { + _src.copyTo(_dst, _mask); + return true; + } + if (haveZeroScale) + { + _dst.setTo(Scalar(delta), _mask); + return true; + } + + if ((sdepth == CV_64F || ddepth == CV_64F) && !doubleSupport) + return false; + + char cvt[2][40]; + String opts = format("-D srcT=%s -D dstT=%s -D convertToWT=%s -D cn=%d -D rowsPerWI=%d" + " -D convertToDT=%s -D workT=%s%s%s%s -D srcT1=%s -D dstT1=%s", + ocl::typeToStr(stype), ocl::typeToStr(dtype), + ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), cn, + rowsPerWI, ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), + ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), + doubleSupport ? " -D DOUBLE_SUPPORT" : "", + haveScale ? " -D HAVE_SCALE" : "", + haveDelta ? " -D HAVE_DELTA" : "", + ocl::typeToStr(sdepth), ocl::typeToStr(ddepth)); + + ocl::Kernel k("normalizek", ocl::core::normalize_oclsrc, opts); + if (k.empty()) + return false; + + UMat mask = _mask.getUMat(), dst = _dst.getUMat(); + + ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), + maskarg = ocl::KernelArg::ReadOnlyNoSize(mask), + dstarg = ocl::KernelArg::ReadWrite(dst); + + if (haveScale) + { + if (haveDelta) + k.args(srcarg, maskarg, dstarg, fscale, fdelta); + else + k.args(srcarg, maskarg, dstarg, fscale); + } + else + { + if (haveDelta) + k.args(srcarg, maskarg, dstarg, fdelta); + else + k.args(srcarg, maskarg, dstarg); + } + + size_t globalsize[2] = { (size_t)src.cols, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI }; + return k.run(2, globalsize, NULL, false); + } + else + { + UMat temp; + src.convertTo( temp, dtype, scale, delta ); + temp.copyTo( _dst, _mask ); + } + + return true; +} // ocl_normalize +#endif // HAVE_OPENCL + +void normalize(InputArray _src, InputOutputArray _dst, double a, double b, + int norm_type, int rtype, InputArray _mask) +{ + CV_INSTRUMENT_REGION(); + + double scale = 1, shift = 0; + int type = _src.type(), depth = CV_MAT_DEPTH(type); + + if( rtype < 0 ) + rtype = _dst.fixedType() ? _dst.depth() : depth; + + if( norm_type == CV_MINMAX ) + { + double smin = 0, smax = 0; + double dmin = MIN( a, b ), dmax = MAX( a, b ); + minMaxIdx( _src, &smin, &smax, 0, 0, _mask ); + scale = (dmax - dmin)*(smax - smin > DBL_EPSILON ? 1./(smax - smin) : 0); + if( rtype == CV_32F ) + { + scale = (float)scale; + shift = (float)dmin - (float)(smin*scale); + } + else + shift = dmin - smin*scale; + } + else if( norm_type == CV_L2 || norm_type == CV_L1 || norm_type == CV_C ) + { + scale = norm( _src, norm_type, _mask ); + scale = scale > DBL_EPSILON ? a/scale : 0.; + shift = 0; + } + else + CV_Error( CV_StsBadArg, "Unknown/unsupported norm type" ); + + CV_OCL_RUN(_dst.isUMat(), + ocl_normalize(_src, _dst, _mask, rtype, scale, shift)) + + Mat src = _src.getMat(); + if( _mask.empty() ) + src.convertTo( _dst, rtype, scale, shift ); + else + { + Mat temp; + src.convertTo( temp, rtype, scale, shift ); + temp.copyTo( _dst, _mask ); + } +} + +} // namespace diff --git a/modules/core/src/persistence_c.cpp b/modules/core/src/persistence_c.cpp index 9ec70190df..904164c783 100644 --- a/modules/core/src/persistence_c.cpp +++ b/modules/core/src/persistence_c.cpp @@ -1378,48 +1378,6 @@ cvTypeOf( const void* struct_ptr ) } -/* universal functions */ -CV_IMPL void -cvRelease( void** struct_ptr ) -{ - CvTypeInfo* info; - - if( !struct_ptr ) - CV_Error( CV_StsNullPtr, "NULL double pointer" ); - - if( *struct_ptr ) - { - info = cvTypeOf( *struct_ptr ); - if( !info ) - CV_Error( CV_StsError, "Unknown object type" ); - if( !info->release ) - CV_Error( CV_StsError, "release function pointer is NULL" ); - - info->release( struct_ptr ); - *struct_ptr = 0; - } -} - - -void* cvClone( const void* struct_ptr ) -{ - void* struct_copy = 0; - CvTypeInfo* info; - - if( !struct_ptr ) - CV_Error( CV_StsNullPtr, "NULL structure pointer" ); - - info = cvTypeOf( struct_ptr ); - if( !info ) - CV_Error( CV_StsError, "Unknown object type" ); - if( !info->clone ) - CV_Error( CV_StsError, "clone function pointer is NULL" ); - - struct_copy = info->clone( struct_ptr ); - return struct_copy; -} - - /* reads matrix, image, sequence, graph etc. */ CV_IMPL void* cvRead( CvFileStorage* fs, CvFileNode* node, CvAttrList* list ) diff --git a/modules/core/src/rand.cpp b/modules/core/src/rand.cpp index 8c66cdcc07..2ae5664245 100644 --- a/modules/core/src/rand.cpp +++ b/modules/core/src/rand.cpp @@ -867,6 +867,9 @@ void cv::randShuffle( InputOutputArray _dst, double iterFactor, RNG* _rng ) func( dst, rng, iterFactor ); } + +#ifndef OPENCV_EXCLUDE_C_API + CV_IMPL void cvRandArr( CvRNG* _rng, CvArr* arr, int disttype, CvScalar param1, CvScalar param2 ) { @@ -884,6 +887,9 @@ CV_IMPL void cvRandShuffle( CvArr* arr, CvRNG* _rng, double iter_factor ) cv::randShuffle( dst, iter_factor, &rng ); } +#endif // OPENCV_EXCLUDE_C_API + + // Mersenne Twister random number generator. // Inspired by http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c diff --git a/modules/core/src/stat_c.cpp b/modules/core/src/stat_c.cpp index d7355b9f94..8b6f0f09e4 100644 --- a/modules/core/src/stat_c.cpp +++ b/modules/core/src/stat_c.cpp @@ -5,6 +5,8 @@ #include "precomp.hpp" +#ifndef OPENCV_EXCLUDE_C_API + CV_IMPL CvScalar cvSum( const CvArr* srcarr ) { cv::Scalar sum = cv::sum(cv::cvarrToMat(srcarr, false, true, 1)); @@ -117,3 +119,5 @@ cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr ) return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask); } + +#endif // OPENCV_EXCLUDE_C_API diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp index f21cf7b7e2..936348f779 100644 --- a/modules/core/src/umatrix.cpp +++ b/modules/core/src/umatrix.cpp @@ -1259,88 +1259,6 @@ UMat UMat::t() const return m; } -UMat UMat::inv(int method) const -{ - UMat m; - invert(*this, m, method); - return m; -} - -UMat UMat::mul(InputArray m, double scale) const -{ - UMat dst; - multiply(*this, m, dst, scale); - return dst; -} - -#ifdef HAVE_OPENCL - -static bool ocl_dot( InputArray _src1, InputArray _src2, double & res ) -{ - UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1); - - int type = src1.type(), depth = CV_MAT_DEPTH(type), - kercn = ocl::predictOptimalVectorWidth(src1, src2); - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; - - if ( !doubleSupport && depth == CV_64F ) - return false; - - int dbsize = ocl::Device::getDefault().maxComputeUnits(); - size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); - int ddepth = std::max(CV_32F, depth); - - int wgs2_aligned = 1; - while (wgs2_aligned < (int)wgs) - wgs2_aligned <<= 1; - wgs2_aligned >>= 1; - - char cvt[40]; - ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, - format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT " - "-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d", - ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth), - ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), - ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt), - (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", - _src1.isContinuous() ? " -D HAVE_SRC_CONT" : "", - _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn)); - if (k.empty()) - return false; - - UMat db(1, dbsize, ddepth); - - ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1), - src2arg = ocl::KernelArg::ReadOnlyNoSize(src2), - dbarg = ocl::KernelArg::PtrWriteOnly(db); - - k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg); - - size_t globalsize = dbsize * wgs; - if (k.run(1, &globalsize, &wgs, false)) - { - res = sum(db.getMat(ACCESS_READ))[0]; - return true; - } - return false; -} - -#endif - -double UMat::dot(InputArray m) const -{ - CV_INSTRUMENT_REGION(); - - CV_Assert(m.sameSize(*this) && m.type() == type()); - -#ifdef HAVE_OPENCL - double r = 0; - CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r) -#endif - - return getMat(ACCESS_READ).dot(m); -} - UMat UMat::zeros(int rows, int cols, int type) { return UMat(rows, cols, type, Scalar::all(0)); @@ -1371,18 +1289,6 @@ UMat UMat::ones(int ndims, const int* sz, int type) return UMat(ndims, sz, type, Scalar(1)); } -UMat UMat::eye(int rows, int cols, int type) -{ - return UMat::eye(Size(cols, rows), type); -} - -UMat UMat::eye(Size size, int type) -{ - UMat m(size, type); - setIdentity(m); - return m; -} - } /* End of file. */